spark git commit: [SPARK-15706][SQL] Fix Wrong Answer when using IF NOT EXISTS in INSERT OVERWRITE for DYNAMIC PARTITION

2016-06-16 Thread yhuai
Repository: spark
Updated Branches:
  refs/heads/master 5ada60614 -> e5d703bca


[SPARK-15706][SQL] Fix Wrong Answer when using IF NOT EXISTS in INSERT 
OVERWRITE for DYNAMIC PARTITION

 What changes were proposed in this pull request?
`IF NOT EXISTS` in `INSERT OVERWRITE` should not support dynamic partitions. If 
we specify `IF NOT EXISTS`, the inserted statement is not shown in the table.

This PR is to issue an exception in this case, just like what Hive does. Also 
issue an exception if users specify `IF NOT EXISTS` if users do not specify any 
`PARTITION` specification.

 How was this patch tested?
Added test cases into `PlanParserSuite` and `InsertIntoHiveTableSuite`

Author: gatorsmile 

Closes #13447 from gatorsmile/insertIfNotExist.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/e5d703bc
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/e5d703bc
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/e5d703bc

Branch: refs/heads/master
Commit: e5d703bca85c65ce329b1e202283cfa35d109146
Parents: 5ada606
Author: gatorsmile 
Authored: Thu Jun 16 22:54:02 2016 -0700
Committer: Yin Huai 
Committed: Thu Jun 16 22:54:02 2016 -0700

--
 .../apache/spark/sql/catalyst/parser/SqlBase.g4 |  2 +-
 .../spark/sql/catalyst/parser/AstBuilder.scala  |  6 ++
 .../plans/logical/basicLogicalOperators.scala   |  1 +
 .../sql/catalyst/parser/PlanParserSuite.scala   | 13 ++--
 .../sql/hive/InsertIntoHiveTableSuite.scala | 68 
 5 files changed, 85 insertions(+), 5 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/e5d703bc/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
--
diff --git 
a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 
b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
index b603196..23e925e 100644
--- 
a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
+++ 
b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
@@ -203,7 +203,7 @@ query
 ;
 
 insertInto
-: INSERT OVERWRITE TABLE tableIdentifier partitionSpec? (IF NOT EXISTS)?
+: INSERT OVERWRITE TABLE tableIdentifier (partitionSpec (IF NOT EXISTS)?)?
 | INSERT INTO TABLE? tableIdentifier partitionSpec?
 ;
 

http://git-wip-us.apache.org/repos/asf/spark/blob/e5d703bc/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
--
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
index e380643..c7420a1 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
@@ -171,6 +171,12 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with 
Logging {
 val tableIdent = visitTableIdentifier(ctx.tableIdentifier)
 val partitionKeys = 
Option(ctx.partitionSpec).map(visitPartitionSpec).getOrElse(Map.empty)
 
+val dynamicPartitionKeys = partitionKeys.filter(_._2.isEmpty)
+if (ctx.EXISTS != null && dynamicPartitionKeys.nonEmpty) {
+  throw new ParseException(s"Dynamic partitions do not support IF NOT 
EXISTS. Specified " +
+"partitions with value: " + dynamicPartitionKeys.keys.mkString("[", 
",", "]"), ctx)
+}
+
 InsertIntoTable(
   UnresolvedRelation(tableIdent, None),
   partitionKeys,

http://git-wip-us.apache.org/repos/asf/spark/blob/e5d703bc/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
--
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
index 898784d..6c3eb3a 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
@@ -377,6 +377,7 @@ case class InsertIntoTable(
   }
 
   assert(overwrite || !ifNotExists)
+  assert(partition.values.forall(_.nonEmpty) || !ifNotExists)
   override lazy val resolved: Boolean =
 childrenResolved && table.resolved && expectedColumns.forall { expected =>
 child.output.size == expected.size && child.output.zip(expected).forall {

http://git-wip-us.apache.org/repos/asf/spark/blob/e5d703bc/sql/catalyst/src/test/scala/or

spark git commit: [SPARK-15706][SQL] Fix Wrong Answer when using IF NOT EXISTS in INSERT OVERWRITE for DYNAMIC PARTITION

2016-06-16 Thread yhuai
Repository: spark
Updated Branches:
  refs/heads/branch-2.0 3994372f4 -> b82abde06


[SPARK-15706][SQL] Fix Wrong Answer when using IF NOT EXISTS in INSERT 
OVERWRITE for DYNAMIC PARTITION

 What changes were proposed in this pull request?
`IF NOT EXISTS` in `INSERT OVERWRITE` should not support dynamic partitions. If 
we specify `IF NOT EXISTS`, the inserted statement is not shown in the table.

This PR is to issue an exception in this case, just like what Hive does. Also 
issue an exception if users specify `IF NOT EXISTS` if users do not specify any 
`PARTITION` specification.

 How was this patch tested?
Added test cases into `PlanParserSuite` and `InsertIntoHiveTableSuite`

Author: gatorsmile 

Closes #13447 from gatorsmile/insertIfNotExist.

(cherry picked from commit e5d703bca85c65ce329b1e202283cfa35d109146)
Signed-off-by: Yin Huai 


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/b82abde0
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/b82abde0
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/b82abde0

Branch: refs/heads/branch-2.0
Commit: b82abde060d97bd95f4fba547545a830602a35fa
Parents: 3994372
Author: gatorsmile 
Authored: Thu Jun 16 22:54:02 2016 -0700
Committer: Yin Huai 
Committed: Thu Jun 16 22:54:17 2016 -0700

--
 .../apache/spark/sql/catalyst/parser/SqlBase.g4 |  2 +-
 .../spark/sql/catalyst/parser/AstBuilder.scala  |  6 ++
 .../plans/logical/basicLogicalOperators.scala   |  1 +
 .../sql/catalyst/parser/PlanParserSuite.scala   | 13 ++--
 .../sql/hive/InsertIntoHiveTableSuite.scala | 68 
 5 files changed, 85 insertions(+), 5 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/b82abde0/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
--
diff --git 
a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 
b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
index b603196..23e925e 100644
--- 
a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
+++ 
b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
@@ -203,7 +203,7 @@ query
 ;
 
 insertInto
-: INSERT OVERWRITE TABLE tableIdentifier partitionSpec? (IF NOT EXISTS)?
+: INSERT OVERWRITE TABLE tableIdentifier (partitionSpec (IF NOT EXISTS)?)?
 | INSERT INTO TABLE? tableIdentifier partitionSpec?
 ;
 

http://git-wip-us.apache.org/repos/asf/spark/blob/b82abde0/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
--
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
index e380643..c7420a1 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
@@ -171,6 +171,12 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with 
Logging {
 val tableIdent = visitTableIdentifier(ctx.tableIdentifier)
 val partitionKeys = 
Option(ctx.partitionSpec).map(visitPartitionSpec).getOrElse(Map.empty)
 
+val dynamicPartitionKeys = partitionKeys.filter(_._2.isEmpty)
+if (ctx.EXISTS != null && dynamicPartitionKeys.nonEmpty) {
+  throw new ParseException(s"Dynamic partitions do not support IF NOT 
EXISTS. Specified " +
+"partitions with value: " + dynamicPartitionKeys.keys.mkString("[", 
",", "]"), ctx)
+}
+
 InsertIntoTable(
   UnresolvedRelation(tableIdent, None),
   partitionKeys,

http://git-wip-us.apache.org/repos/asf/spark/blob/b82abde0/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
--
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
index 898784d..6c3eb3a 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
@@ -377,6 +377,7 @@ case class InsertIntoTable(
   }
 
   assert(overwrite || !ifNotExists)
+  assert(partition.values.forall(_.nonEmpty) || !ifNotExists)
   override lazy val resolved: Boolean =
 childrenResolved && table.resolved && expectedColumns.forall { expected =>
 child.output.size == expected.size && child.output.zip(expect

spark git commit: [SPARK-15822] [SQL] Prevent byte array backed classes from referencing freed memory

2016-06-16 Thread davies
Repository: spark
Updated Branches:
  refs/heads/branch-2.0 f530331e6 -> 3994372f4


[SPARK-15822] [SQL] Prevent byte array backed classes from referencing freed 
memory

## What changes were proposed in this pull request?
`UTF8String` and all `Unsafe*` classes are backed by either on-heap or off-heap 
byte arrays. The code generated version `SortMergeJoin` buffers the left hand 
side join keys during iteration. This was actually problematic in off-heap mode 
when one of the keys is a `UTF8String` (or any other 'Unsafe*` object) and the 
left hand side iterator was exhausted (and released its memory); the buffered 
keys would reference freed memory. This causes Seg-faults and all kinds of 
other undefined behavior when we would use one these buffered keys.

This PR fixes this problem by creating copies of the buffered variables. I have 
added a general method to the `CodeGenerator` for this. I have checked all 
places in which this could happen, and only `SortMergeJoin` had this problem.

This PR is largely based on the work of robbinspg and he should be credited for 
this.

closes https://github.com/apache/spark/pull/13707

## How was this patch tested?
Manually tested on problematic workloads.

Author: Pete Robbins 
Author: Herman van Hovell 

Closes #13723 from hvanhovell/SPARK-15822-2.

(cherry picked from commit 5ada606144c7bf38a797764619d7d1ff677802b3)
Signed-off-by: Davies Liu 


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/3994372f
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/3994372f
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/3994372f

Branch: refs/heads/branch-2.0
Commit: 3994372f48eefe080ce7a80750ccf960e3a7968b
Parents: f530331
Author: Pete Robbins 
Authored: Thu Jun 16 22:27:32 2016 -0700
Committer: Davies Liu 
Committed: Thu Jun 16 22:27:43 2016 -0700

--
 .../expressions/codegen/CodeGenerator.scala | 16 
 .../sql/execution/joins/SortMergeJoinExec.scala |  8 +---
 2 files changed, 17 insertions(+), 7 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/3994372f/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
--
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
index ff97cd3..6392ff4 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
@@ -130,6 +130,22 @@ class CodegenContext {
 mutableStates += ((javaType, variableName, initCode))
   }
 
+  /**
+   * Add buffer variable which stores data coming from an [[InternalRow]]. 
This methods guarantees
+   * that the variable is safely stored, which is important for (potentially) 
byte array backed
+   * data types like: UTF8String, ArrayData, MapData & InternalRow.
+   */
+  def addBufferedState(dataType: DataType, variableName: String, initCode: 
String): ExprCode = {
+val value = freshName(variableName)
+addMutableState(javaType(dataType), value, "")
+val code = dataType match {
+  case StringType => s"$value = $initCode.clone();"
+  case _: StructType | _: ArrayType | _: MapType => s"$value = 
$initCode.copy();"
+  case _ => s"$value = $initCode;"
+}
+ExprCode(code, "false", value)
+  }
+
   def declareMutableStates(): String = {
 // It's possible that we add same mutable state twice, e.g. the 
`mergeExpressions` in
 // `TypedAggregateExpression`, we should call `distinct` here to remove 
the duplicated ones.

http://git-wip-us.apache.org/repos/asf/spark/blob/3994372f/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/SortMergeJoinExec.scala
--
diff --git 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/SortMergeJoinExec.scala
 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/SortMergeJoinExec.scala
index 32f0bc5..fac6b8d 100644
--- 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/SortMergeJoinExec.scala
+++ 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/SortMergeJoinExec.scala
@@ -336,13 +336,7 @@ case class SortMergeJoinExec(
 
   private def copyKeys(ctx: CodegenContext, vars: Seq[ExprCode]): 
Seq[ExprCode] = {
 vars.zipWithIndex.map { case (ev, i) =>
-  val value = ctx.freshName("value")
-  ctx.addMutableState(ctx.javaType(leftKeys(i).dataType), value, "")
-  val code =
-s"""
-   |$value = ${ev.value};
- """.st

spark git commit: [SPARK-15822] [SQL] Prevent byte array backed classes from referencing freed memory

2016-06-16 Thread davies
Repository: spark
Updated Branches:
  refs/heads/master 513a03e41 -> 5ada60614


[SPARK-15822] [SQL] Prevent byte array backed classes from referencing freed 
memory

## What changes were proposed in this pull request?
`UTF8String` and all `Unsafe*` classes are backed by either on-heap or off-heap 
byte arrays. The code generated version `SortMergeJoin` buffers the left hand 
side join keys during iteration. This was actually problematic in off-heap mode 
when one of the keys is a `UTF8String` (or any other 'Unsafe*` object) and the 
left hand side iterator was exhausted (and released its memory); the buffered 
keys would reference freed memory. This causes Seg-faults and all kinds of 
other undefined behavior when we would use one these buffered keys.

This PR fixes this problem by creating copies of the buffered variables. I have 
added a general method to the `CodeGenerator` for this. I have checked all 
places in which this could happen, and only `SortMergeJoin` had this problem.

This PR is largely based on the work of robbinspg and he should be credited for 
this.

closes https://github.com/apache/spark/pull/13707

## How was this patch tested?
Manually tested on problematic workloads.

Author: Pete Robbins 
Author: Herman van Hovell 

Closes #13723 from hvanhovell/SPARK-15822-2.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/5ada6061
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/5ada6061
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/5ada6061

Branch: refs/heads/master
Commit: 5ada606144c7bf38a797764619d7d1ff677802b3
Parents: 513a03e
Author: Pete Robbins 
Authored: Thu Jun 16 22:27:32 2016 -0700
Committer: Davies Liu 
Committed: Thu Jun 16 22:27:32 2016 -0700

--
 .../expressions/codegen/CodeGenerator.scala | 16 
 .../sql/execution/joins/SortMergeJoinExec.scala |  8 +---
 2 files changed, 17 insertions(+), 7 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/5ada6061/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
--
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
index ff97cd3..6392ff4 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
@@ -130,6 +130,22 @@ class CodegenContext {
 mutableStates += ((javaType, variableName, initCode))
   }
 
+  /**
+   * Add buffer variable which stores data coming from an [[InternalRow]]. 
This methods guarantees
+   * that the variable is safely stored, which is important for (potentially) 
byte array backed
+   * data types like: UTF8String, ArrayData, MapData & InternalRow.
+   */
+  def addBufferedState(dataType: DataType, variableName: String, initCode: 
String): ExprCode = {
+val value = freshName(variableName)
+addMutableState(javaType(dataType), value, "")
+val code = dataType match {
+  case StringType => s"$value = $initCode.clone();"
+  case _: StructType | _: ArrayType | _: MapType => s"$value = 
$initCode.copy();"
+  case _ => s"$value = $initCode;"
+}
+ExprCode(code, "false", value)
+  }
+
   def declareMutableStates(): String = {
 // It's possible that we add same mutable state twice, e.g. the 
`mergeExpressions` in
 // `TypedAggregateExpression`, we should call `distinct` here to remove 
the duplicated ones.

http://git-wip-us.apache.org/repos/asf/spark/blob/5ada6061/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/SortMergeJoinExec.scala
--
diff --git 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/SortMergeJoinExec.scala
 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/SortMergeJoinExec.scala
index 32f0bc5..fac6b8d 100644
--- 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/SortMergeJoinExec.scala
+++ 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/SortMergeJoinExec.scala
@@ -336,13 +336,7 @@ case class SortMergeJoinExec(
 
   private def copyKeys(ctx: CodegenContext, vars: Seq[ExprCode]): 
Seq[ExprCode] = {
 vars.zipWithIndex.map { case (ev, i) =>
-  val value = ctx.freshName("value")
-  ctx.addMutableState(ctx.javaType(leftKeys(i).dataType), value, "")
-  val code =
-s"""
-   |$value = ${ev.value};
- """.stripMargin
-  ExprCode(code, "false", value)
+  ctx.addBufferedState(leftKeys(i).dataType, "value"

spark git commit: [SPARK-15908][R] Add varargs-type dropDuplicates() function in SparkR

2016-06-16 Thread shivaram
Repository: spark
Updated Branches:
  refs/heads/branch-2.0 2127f99f2 -> f530331e6


[SPARK-15908][R] Add varargs-type dropDuplicates() function in SparkR

## What changes were proposed in this pull request?

This PR adds varargs-type `dropDuplicates` function to SparkR for API parity.
Refer to https://issues.apache.org/jira/browse/SPARK-15807, too.

## How was this patch tested?

Pass the Jenkins tests with new testcases.

Author: Dongjoon Hyun 

Closes #13684 from dongjoon-hyun/SPARK-15908.

(cherry picked from commit 513a03e41e27d9c5f70911faccc5d3aecd8bdde9)
Signed-off-by: Shivaram Venkataraman 


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/f530331e
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/f530331e
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/f530331e

Branch: refs/heads/branch-2.0
Commit: f530331e6f8160f3fb2613722fae01ea589f0e99
Parents: 2127f99
Author: Dongjoon Hyun 
Authored: Thu Jun 16 20:35:17 2016 -0700
Committer: Shivaram Venkataraman 
Committed: Thu Jun 16 20:35:25 2016 -0700

--
 R/pkg/R/DataFrame.R   | 25 +++--
 R/pkg/R/generics.R|  7 ++-
 R/pkg/inst/tests/testthat/test_sparkSQL.R |  8 
 3 files changed, 29 insertions(+), 11 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/f530331e/R/pkg/R/DataFrame.R
--
diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R
index d72cbbd..c710bff 100644
--- a/R/pkg/R/DataFrame.R
+++ b/R/pkg/R/DataFrame.R
@@ -1936,10 +1936,11 @@ setMethod("where",
 #' the subset of columns.
 #'
 #' @param x A SparkDataFrame.
-#' @param colnames A character vector of column names.
+#' @param ... A character vector of column names or string column names.
+#'If the first argument contains a character vector, the 
followings are ignored.
 #' @return A SparkDataFrame with duplicate rows removed.
 #' @family SparkDataFrame functions
-#' @rdname dropduplicates
+#' @rdname dropDuplicates
 #' @name dropDuplicates
 #' @export
 #' @examples
@@ -1949,14 +1950,26 @@ setMethod("where",
 #' path <- "path/to/file.json"
 #' df <- read.json(path)
 #' dropDuplicates(df)
+#' dropDuplicates(df, "col1", "col2")
 #' dropDuplicates(df, c("col1", "col2"))
 #' }
 setMethod("dropDuplicates",
   signature(x = "SparkDataFrame"),
-  function(x, colNames = columns(x)) {
-stopifnot(class(colNames) == "character")
-
-sdf <- callJMethod(x@sdf, "dropDuplicates", as.list(colNames))
+  function(x, ...) {
+cols <- list(...)
+if (length(cols) == 0) {
+  sdf <- callJMethod(x@sdf, "dropDuplicates", as.list(columns(x)))
+} else {
+  if (!all(sapply(cols, function(c) { is.character(c) }))) {
+stop("all columns names should be characters")
+  }
+  col <- cols[[1]]
+  if (length(col) > 1) {
+sdf <- callJMethod(x@sdf, "dropDuplicates", as.list(col))
+  } else {
+sdf <- callJMethod(x@sdf, "dropDuplicates", cols)
+  }
+}
 dataFrame(sdf)
   })
 

http://git-wip-us.apache.org/repos/asf/spark/blob/f530331e/R/pkg/R/generics.R
--
diff --git a/R/pkg/R/generics.R b/R/pkg/R/generics.R
index 40a96d8..8164e77 100644
--- a/R/pkg/R/generics.R
+++ b/R/pkg/R/generics.R
@@ -466,12 +466,9 @@ setGeneric("describe", function(x, col, ...) { 
standardGeneric("describe") })
 #' @export
 setGeneric("drop", function(x, ...) { standardGeneric("drop") })
 
-#' @rdname dropduplicates
+#' @rdname dropDuplicates
 #' @export
-setGeneric("dropDuplicates",
-   function(x, colNames = columns(x)) {
- standardGeneric("dropDuplicates")
-   })
+setGeneric("dropDuplicates", function(x, ...) { 
standardGeneric("dropDuplicates") })
 
 #' @rdname nafunctions
 #' @export

http://git-wip-us.apache.org/repos/asf/spark/blob/f530331e/R/pkg/inst/tests/testthat/test_sparkSQL.R
--
diff --git a/R/pkg/inst/tests/testthat/test_sparkSQL.R 
b/R/pkg/inst/tests/testthat/test_sparkSQL.R
index c11930a..11d6936 100644
--- a/R/pkg/inst/tests/testthat/test_sparkSQL.R
+++ b/R/pkg/inst/tests/testthat/test_sparkSQL.R
@@ -796,6 +796,14 @@ test_that("distinct(), unique() and dropDuplicates() on 
DataFrames", {
 result[order(result$key, result$value1, result$value2), ],
 expected)
 
+  result <- collect(dropDuplicates(df, "key", "value1"))
+  expected <- rbind.data.frame(
+c(1, 1, 1), c(1, 2, 1), c(2, 1, 2), c(2, 2, 2))
+  names(expected) <- c("key", "value1", "value2"

spark git commit: [SPARK-15908][R] Add varargs-type dropDuplicates() function in SparkR

2016-06-16 Thread shivaram
Repository: spark
Updated Branches:
  refs/heads/master 5fd20b66f -> 513a03e41


[SPARK-15908][R] Add varargs-type dropDuplicates() function in SparkR

## What changes were proposed in this pull request?

This PR adds varargs-type `dropDuplicates` function to SparkR for API parity.
Refer to https://issues.apache.org/jira/browse/SPARK-15807, too.

## How was this patch tested?

Pass the Jenkins tests with new testcases.

Author: Dongjoon Hyun 

Closes #13684 from dongjoon-hyun/SPARK-15908.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/513a03e4
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/513a03e4
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/513a03e4

Branch: refs/heads/master
Commit: 513a03e41e27d9c5f70911faccc5d3aecd8bdde9
Parents: 5fd20b6
Author: Dongjoon Hyun 
Authored: Thu Jun 16 20:35:17 2016 -0700
Committer: Shivaram Venkataraman 
Committed: Thu Jun 16 20:35:17 2016 -0700

--
 R/pkg/R/DataFrame.R   | 25 +++--
 R/pkg/R/generics.R|  7 ++-
 R/pkg/inst/tests/testthat/test_sparkSQL.R |  8 
 3 files changed, 29 insertions(+), 11 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/513a03e4/R/pkg/R/DataFrame.R
--
diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R
index d72cbbd..c710bff 100644
--- a/R/pkg/R/DataFrame.R
+++ b/R/pkg/R/DataFrame.R
@@ -1936,10 +1936,11 @@ setMethod("where",
 #' the subset of columns.
 #'
 #' @param x A SparkDataFrame.
-#' @param colnames A character vector of column names.
+#' @param ... A character vector of column names or string column names.
+#'If the first argument contains a character vector, the 
followings are ignored.
 #' @return A SparkDataFrame with duplicate rows removed.
 #' @family SparkDataFrame functions
-#' @rdname dropduplicates
+#' @rdname dropDuplicates
 #' @name dropDuplicates
 #' @export
 #' @examples
@@ -1949,14 +1950,26 @@ setMethod("where",
 #' path <- "path/to/file.json"
 #' df <- read.json(path)
 #' dropDuplicates(df)
+#' dropDuplicates(df, "col1", "col2")
 #' dropDuplicates(df, c("col1", "col2"))
 #' }
 setMethod("dropDuplicates",
   signature(x = "SparkDataFrame"),
-  function(x, colNames = columns(x)) {
-stopifnot(class(colNames) == "character")
-
-sdf <- callJMethod(x@sdf, "dropDuplicates", as.list(colNames))
+  function(x, ...) {
+cols <- list(...)
+if (length(cols) == 0) {
+  sdf <- callJMethod(x@sdf, "dropDuplicates", as.list(columns(x)))
+} else {
+  if (!all(sapply(cols, function(c) { is.character(c) }))) {
+stop("all columns names should be characters")
+  }
+  col <- cols[[1]]
+  if (length(col) > 1) {
+sdf <- callJMethod(x@sdf, "dropDuplicates", as.list(col))
+  } else {
+sdf <- callJMethod(x@sdf, "dropDuplicates", cols)
+  }
+}
 dataFrame(sdf)
   })
 

http://git-wip-us.apache.org/repos/asf/spark/blob/513a03e4/R/pkg/R/generics.R
--
diff --git a/R/pkg/R/generics.R b/R/pkg/R/generics.R
index 40a96d8..8164e77 100644
--- a/R/pkg/R/generics.R
+++ b/R/pkg/R/generics.R
@@ -466,12 +466,9 @@ setGeneric("describe", function(x, col, ...) { 
standardGeneric("describe") })
 #' @export
 setGeneric("drop", function(x, ...) { standardGeneric("drop") })
 
-#' @rdname dropduplicates
+#' @rdname dropDuplicates
 #' @export
-setGeneric("dropDuplicates",
-   function(x, colNames = columns(x)) {
- standardGeneric("dropDuplicates")
-   })
+setGeneric("dropDuplicates", function(x, ...) { 
standardGeneric("dropDuplicates") })
 
 #' @rdname nafunctions
 #' @export

http://git-wip-us.apache.org/repos/asf/spark/blob/513a03e4/R/pkg/inst/tests/testthat/test_sparkSQL.R
--
diff --git a/R/pkg/inst/tests/testthat/test_sparkSQL.R 
b/R/pkg/inst/tests/testthat/test_sparkSQL.R
index c11930a..11d6936 100644
--- a/R/pkg/inst/tests/testthat/test_sparkSQL.R
+++ b/R/pkg/inst/tests/testthat/test_sparkSQL.R
@@ -796,6 +796,14 @@ test_that("distinct(), unique() and dropDuplicates() on 
DataFrames", {
 result[order(result$key, result$value1, result$value2), ],
 expected)
 
+  result <- collect(dropDuplicates(df, "key", "value1"))
+  expected <- rbind.data.frame(
+c(1, 1, 1), c(1, 2, 1), c(2, 1, 2), c(2, 2, 2))
+  names(expected) <- c("key", "value1", "value2")
+  expect_equivalent(
+result[order(result$key, result$value1, result$value2), ],
+expected)
+
   result <

spark git commit: [SPARK-15490][R][DOC] SparkR 2.0 QA: New R APIs and API docs for non-MLib changes

2016-06-16 Thread jkbradley
Repository: spark
Updated Branches:
  refs/heads/branch-2.0 feaba979b -> 2127f99f2


[SPARK-15490][R][DOC] SparkR 2.0 QA: New R APIs and API docs for non-MLib 
changes

## What changes were proposed in this pull request?
R Docs changes
include typos, format, layout.
## How was this patch tested?
Test locally.

Author: Kai Jiang 

Closes #13394 from vectorijk/spark-15490.

(cherry picked from commit 5fd20b66ffe18c05cf257af7f30d32464d2fe8e7)
Signed-off-by: Joseph K. Bradley 


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/2127f99f
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/2127f99f
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/2127f99f

Branch: refs/heads/branch-2.0
Commit: 2127f99f2c8cf6d3f85e6408ce47b82e0c3cad4d
Parents: feaba97
Author: Kai Jiang 
Authored: Thu Jun 16 19:39:33 2016 -0700
Committer: Joseph K. Bradley 
Committed: Thu Jun 16 19:39:43 2016 -0700

--
 R/pkg/R/DataFrame.R  | 91 ++-
 R/pkg/R/RDD.R| 14 
 R/pkg/R/WindowSpec.R |  7 ++--
 R/pkg/R/broadcast.R  |  8 +++--
 R/pkg/R/column.R |  6 ++--
 R/pkg/R/context.R| 41 ++---
 R/pkg/R/functions.R  |  2 +-
 R/pkg/R/group.R  |  6 ++--
 R/pkg/R/mllib.R  | 34 --
 R/pkg/R/utils.R  |  2 ++
 10 files changed, 123 insertions(+), 88 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/2127f99f/R/pkg/R/DataFrame.R
--
diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R
index 9a9b3f7..d72cbbd 100644
--- a/R/pkg/R/DataFrame.R
+++ b/R/pkg/R/DataFrame.R
@@ -23,9 +23,11 @@ NULL
 setOldClass("jobj")
 setOldClass("structType")
 
-#' @title S4 class that represents a SparkDataFrame
-#' @description DataFrames can be created using functions like 
\link{createDataFrame},
-#'  \link{read.json}, \link{table} etc.
+#' S4 class that represents a SparkDataFrame
+#'
+#' DataFrames can be created using functions like \link{createDataFrame},
+#' \link{read.json}, \link{table} etc.
+#'
 #' @family SparkDataFrame functions
 #' @rdname SparkDataFrame
 #' @docType class
@@ -629,8 +631,6 @@ setMethod("repartition",
 #'
 #' @param x A SparkDataFrame
 #' @return A StringRRDD of JSON objects
-#' @family SparkDataFrame functions
-#' @rdname tojson
 #' @noRd
 #' @examples
 #'\dontrun{
@@ -648,7 +648,7 @@ setMethod("toJSON",
 RDD(jrdd, serializedMode = "string")
   })
 
-#' write.json
+#' Save the contents of SparkDataFrame as a JSON file
 #'
 #' Save the contents of a SparkDataFrame as a JSON file (one object per line). 
Files written out
 #' with this method can be read back in as a SparkDataFrame using read.json().
@@ -675,7 +675,7 @@ setMethod("write.json",
 invisible(callJMethod(write, "json", path))
   })
 
-#' write.parquet
+#' Save the contents of SparkDataFrame as a Parquet file, preserving the 
schema.
 #'
 #' Save the contents of a SparkDataFrame as a Parquet file, preserving the 
schema. Files written out
 #' with this method can be read back in as a SparkDataFrame using 
read.parquet().
@@ -713,9 +713,9 @@ setMethod("saveAsParquetFile",
 write.parquet(x, path)
   })
 
-#' write.text
+#' Save the content of SparkDataFrame in a text file at the specified path.
 #'
-#' Saves the content of the SparkDataFrame in a text file at the specified 
path.
+#' Save the content of the SparkDataFrame in a text file at the specified path.
 #' The SparkDataFrame must have only one column of string type with the name 
"value".
 #' Each row becomes a new line in the output file.
 #'
@@ -820,8 +820,6 @@ setMethod("sample_frac",
 sample(x, withReplacement, fraction, seed)
   })
 
-#' nrow
-#'
 #' Returns the number of rows in a SparkDataFrame
 #'
 #' @param x A SparkDataFrame
@@ -874,6 +872,8 @@ setMethod("ncol",
 length(columns(x))
   })
 
+#' Returns the dimensions of SparkDataFrame
+#'
 #' Returns the dimensions (number of rows and columns) of a SparkDataFrame
 #' @param x a SparkDataFrame
 #'
@@ -2012,8 +2012,9 @@ setMethod("join",
 dataFrame(sdf)
   })
 
+#' Merges two data frames
+#'
 #' @name merge
-#' @title Merges two data frames
 #' @param x the first data frame to be joined
 #' @param y the second data frame to be joined
 #' @param by a character vector specifying the join columns. If by is not
@@ -2127,7 +2128,6 @@ setMethod("merge",
 joinRes
   })
 
-#'
 #' Creates a list of columns by replacing the intersected ones with aliases.
 #' The name of the alias column is formed by concatanating the original column 
name and a suffix.
 #'
@@ -2182,8 +2182,9 @@ setMethod("unionAll",
 dataFrame(unioned)
   

spark git commit: [SPARK-15490][R][DOC] SparkR 2.0 QA: New R APIs and API docs for non-MLib changes

2016-06-16 Thread jkbradley
Repository: spark
Updated Branches:
  refs/heads/master 63470afc9 -> 5fd20b66f


[SPARK-15490][R][DOC] SparkR 2.0 QA: New R APIs and API docs for non-MLib 
changes

## What changes were proposed in this pull request?
R Docs changes
include typos, format, layout.
## How was this patch tested?
Test locally.

Author: Kai Jiang 

Closes #13394 from vectorijk/spark-15490.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/5fd20b66
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/5fd20b66
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/5fd20b66

Branch: refs/heads/master
Commit: 5fd20b66ffe18c05cf257af7f30d32464d2fe8e7
Parents: 63470af
Author: Kai Jiang 
Authored: Thu Jun 16 19:39:33 2016 -0700
Committer: Joseph K. Bradley 
Committed: Thu Jun 16 19:39:33 2016 -0700

--
 R/pkg/R/DataFrame.R  | 91 ++-
 R/pkg/R/RDD.R| 14 
 R/pkg/R/WindowSpec.R |  7 ++--
 R/pkg/R/broadcast.R  |  8 +++--
 R/pkg/R/column.R |  6 ++--
 R/pkg/R/context.R| 41 ++---
 R/pkg/R/functions.R  |  2 +-
 R/pkg/R/group.R  |  6 ++--
 R/pkg/R/mllib.R  | 34 --
 R/pkg/R/utils.R  |  2 ++
 10 files changed, 123 insertions(+), 88 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/5fd20b66/R/pkg/R/DataFrame.R
--
diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R
index 9a9b3f7..d72cbbd 100644
--- a/R/pkg/R/DataFrame.R
+++ b/R/pkg/R/DataFrame.R
@@ -23,9 +23,11 @@ NULL
 setOldClass("jobj")
 setOldClass("structType")
 
-#' @title S4 class that represents a SparkDataFrame
-#' @description DataFrames can be created using functions like 
\link{createDataFrame},
-#'  \link{read.json}, \link{table} etc.
+#' S4 class that represents a SparkDataFrame
+#'
+#' DataFrames can be created using functions like \link{createDataFrame},
+#' \link{read.json}, \link{table} etc.
+#'
 #' @family SparkDataFrame functions
 #' @rdname SparkDataFrame
 #' @docType class
@@ -629,8 +631,6 @@ setMethod("repartition",
 #'
 #' @param x A SparkDataFrame
 #' @return A StringRRDD of JSON objects
-#' @family SparkDataFrame functions
-#' @rdname tojson
 #' @noRd
 #' @examples
 #'\dontrun{
@@ -648,7 +648,7 @@ setMethod("toJSON",
 RDD(jrdd, serializedMode = "string")
   })
 
-#' write.json
+#' Save the contents of SparkDataFrame as a JSON file
 #'
 #' Save the contents of a SparkDataFrame as a JSON file (one object per line). 
Files written out
 #' with this method can be read back in as a SparkDataFrame using read.json().
@@ -675,7 +675,7 @@ setMethod("write.json",
 invisible(callJMethod(write, "json", path))
   })
 
-#' write.parquet
+#' Save the contents of SparkDataFrame as a Parquet file, preserving the 
schema.
 #'
 #' Save the contents of a SparkDataFrame as a Parquet file, preserving the 
schema. Files written out
 #' with this method can be read back in as a SparkDataFrame using 
read.parquet().
@@ -713,9 +713,9 @@ setMethod("saveAsParquetFile",
 write.parquet(x, path)
   })
 
-#' write.text
+#' Save the content of SparkDataFrame in a text file at the specified path.
 #'
-#' Saves the content of the SparkDataFrame in a text file at the specified 
path.
+#' Save the content of the SparkDataFrame in a text file at the specified path.
 #' The SparkDataFrame must have only one column of string type with the name 
"value".
 #' Each row becomes a new line in the output file.
 #'
@@ -820,8 +820,6 @@ setMethod("sample_frac",
 sample(x, withReplacement, fraction, seed)
   })
 
-#' nrow
-#'
 #' Returns the number of rows in a SparkDataFrame
 #'
 #' @param x A SparkDataFrame
@@ -874,6 +872,8 @@ setMethod("ncol",
 length(columns(x))
   })
 
+#' Returns the dimensions of SparkDataFrame
+#'
 #' Returns the dimensions (number of rows and columns) of a SparkDataFrame
 #' @param x a SparkDataFrame
 #'
@@ -2012,8 +2012,9 @@ setMethod("join",
 dataFrame(sdf)
   })
 
+#' Merges two data frames
+#'
 #' @name merge
-#' @title Merges two data frames
 #' @param x the first data frame to be joined
 #' @param y the second data frame to be joined
 #' @param by a character vector specifying the join columns. If by is not
@@ -2127,7 +2128,6 @@ setMethod("merge",
 joinRes
   })
 
-#'
 #' Creates a list of columns by replacing the intersected ones with aliases.
 #' The name of the alias column is formed by concatanating the original column 
name and a suffix.
 #'
@@ -2182,8 +2182,9 @@ setMethod("unionAll",
 dataFrame(unioned)
   })
 
-#' @title Union two or more SparkDataFrames
-#' @description Returns a new SparkDataFrame containing r

spark git commit: [SPARK-15782][YARN] Fix spark.jars and spark.yarn.dist.jars handling

2016-06-16 Thread vanzin
Repository: spark
Updated Branches:
  refs/heads/master f1bf0d2f3 -> 63470afc9


[SPARK-15782][YARN] Fix spark.jars and spark.yarn.dist.jars handling

When `--packages` is specified with spark-shell the classes from those packages 
cannot be found, which I think is due to some of the changes in SPARK-12343.

Tested manually with both scala 2.10 and 2.11 repls.

vanzin davies can you guys please review?

Author: Marcelo Vanzin 
Author: Nezih Yigitbasi 

Closes #13709 from nezihyigitbasi/SPARK-15782.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/63470afc
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/63470afc
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/63470afc

Branch: refs/heads/master
Commit: 63470afc997fb9d6b6f8a911c25964743556c9cc
Parents: f1bf0d2
Author: Nezih Yigitbasi 
Authored: Thu Jun 16 18:19:29 2016 -0700
Committer: Marcelo Vanzin 
Committed: Thu Jun 16 18:20:16 2016 -0700

--
 .../scala/org/apache/spark/SparkContext.scala   |  2 +-
 .../scala/org/apache/spark/util/Utils.scala | 25 +++
 .../apache/spark/deploy/SparkSubmitSuite.scala  | 12 
 .../org/apache/spark/repl/SparkILoop.scala  | 32 
 .../main/scala/org/apache/spark/repl/Main.scala |  4 +--
 5 files changed, 59 insertions(+), 16 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/63470afc/core/src/main/scala/org/apache/spark/SparkContext.scala
--
diff --git a/core/src/main/scala/org/apache/spark/SparkContext.scala 
b/core/src/main/scala/org/apache/spark/SparkContext.scala
index d56946e..d870181 100644
--- a/core/src/main/scala/org/apache/spark/SparkContext.scala
+++ b/core/src/main/scala/org/apache/spark/SparkContext.scala
@@ -391,7 +391,7 @@ class SparkContext(config: SparkConf) extends Logging with 
ExecutorAllocationCli
 
 _conf.set("spark.executor.id", SparkContext.DRIVER_IDENTIFIER)
 
-_jars = 
_conf.getOption("spark.jars").map(_.split(",")).map(_.filter(_.nonEmpty)).toSeq.flatten
+_jars = Utils.getUserJars(_conf)
 _files = 
_conf.getOption("spark.files").map(_.split(",")).map(_.filter(_.nonEmpty))
   .toSeq.flatten
 

http://git-wip-us.apache.org/repos/asf/spark/blob/63470afc/core/src/main/scala/org/apache/spark/util/Utils.scala
--
diff --git a/core/src/main/scala/org/apache/spark/util/Utils.scala 
b/core/src/main/scala/org/apache/spark/util/Utils.scala
index f9d0540..17d193b 100644
--- a/core/src/main/scala/org/apache/spark/util/Utils.scala
+++ b/core/src/main/scala/org/apache/spark/util/Utils.scala
@@ -2352,6 +2352,31 @@ private[spark] object Utils extends Logging {
 log.info(s"Started daemon with process name: ${Utils.getProcessName()}")
 SignalUtils.registerLogger(log)
   }
+
+  /**
+   * Unions two comma-separated lists of files and filters out empty strings.
+   */
+  def unionFileLists(leftList: Option[String], rightList: Option[String]): 
Set[String] = {
+var allFiles = Set[String]()
+leftList.foreach { value => allFiles ++= value.split(",") }
+rightList.foreach { value => allFiles ++= value.split(",") }
+allFiles.filter { _.nonEmpty }
+  }
+
+  /**
+   * In YARN mode this method returns a union of the jar files pointed by 
"spark.jars" and the
+   * "spark.yarn.dist.jars" properties, while in other modes it returns the 
jar files pointed by
+   * only the "spark.jars" property.
+   */
+  def getUserJars(conf: SparkConf): Seq[String] = {
+val sparkJars = conf.getOption("spark.jars")
+if (conf.get("spark.master") == "yarn") {
+  val yarnJars = conf.getOption("spark.yarn.dist.jars")
+  unionFileLists(sparkJars, yarnJars).toSeq
+} else {
+  sparkJars.map(_.split(",")).map(_.filter(_.nonEmpty)).toSeq.flatten
+}
+  }
 }
 
 /**

http://git-wip-us.apache.org/repos/asf/spark/blob/63470afc/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala
--
diff --git a/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala 
b/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala
index 2718976..0b02059 100644
--- a/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala
@@ -570,6 +570,18 @@ class SparkSubmitSuite
   appArgs.executorMemory should be ("2.3g")
 }
   }
+
+  test("comma separated list of files are unioned correctly") {
+val left = Option("/tmp/a.jar,/tmp/b.jar")
+val right = Option("/tmp/c.jar,/tmp/a.jar")
+val emptyString = Option("")
+Utils.unionFileLists(left, right) should be (Set("/tmp/a.jar", 
"/tmp/b.jar", "/tmp/c.jar"))
+  

spark git commit: [SPARK-15782][YARN] Fix spark.jars and spark.yarn.dist.jars handling

2016-06-16 Thread vanzin
Repository: spark
Updated Branches:
  refs/heads/branch-2.0 68e7a25cc -> feaba979b


[SPARK-15782][YARN] Fix spark.jars and spark.yarn.dist.jars handling

When `--packages` is specified with spark-shell the classes from those packages 
cannot be found, which I think is due to some of the changes in SPARK-12343.

Tested manually with both scala 2.10 and 2.11 repls.

vanzin davies can you guys please review?

Author: Marcelo Vanzin 
Author: Nezih Yigitbasi 

Closes #13709 from nezihyigitbasi/SPARK-15782.

(cherry picked from commit 63470afc997fb9d6b6f8a911c25964743556c9cc)
Signed-off-by: Marcelo Vanzin 


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/feaba979
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/feaba979
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/feaba979

Branch: refs/heads/branch-2.0
Commit: feaba979b30b27f661ae44ae3f12eabc3a6e55b3
Parents: 68e7a25
Author: Nezih Yigitbasi 
Authored: Thu Jun 16 18:19:29 2016 -0700
Committer: Marcelo Vanzin 
Committed: Thu Jun 16 18:20:42 2016 -0700

--
 .../scala/org/apache/spark/SparkContext.scala   |  2 +-
 .../scala/org/apache/spark/util/Utils.scala | 25 +++
 .../apache/spark/deploy/SparkSubmitSuite.scala  | 12 
 .../org/apache/spark/repl/SparkILoop.scala  | 32 
 .../main/scala/org/apache/spark/repl/Main.scala |  4 +--
 5 files changed, 59 insertions(+), 16 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/feaba979/core/src/main/scala/org/apache/spark/SparkContext.scala
--
diff --git a/core/src/main/scala/org/apache/spark/SparkContext.scala 
b/core/src/main/scala/org/apache/spark/SparkContext.scala
index d56946e..d870181 100644
--- a/core/src/main/scala/org/apache/spark/SparkContext.scala
+++ b/core/src/main/scala/org/apache/spark/SparkContext.scala
@@ -391,7 +391,7 @@ class SparkContext(config: SparkConf) extends Logging with 
ExecutorAllocationCli
 
 _conf.set("spark.executor.id", SparkContext.DRIVER_IDENTIFIER)
 
-_jars = 
_conf.getOption("spark.jars").map(_.split(",")).map(_.filter(_.nonEmpty)).toSeq.flatten
+_jars = Utils.getUserJars(_conf)
 _files = 
_conf.getOption("spark.files").map(_.split(",")).map(_.filter(_.nonEmpty))
   .toSeq.flatten
 

http://git-wip-us.apache.org/repos/asf/spark/blob/feaba979/core/src/main/scala/org/apache/spark/util/Utils.scala
--
diff --git a/core/src/main/scala/org/apache/spark/util/Utils.scala 
b/core/src/main/scala/org/apache/spark/util/Utils.scala
index f9d0540..17d193b 100644
--- a/core/src/main/scala/org/apache/spark/util/Utils.scala
+++ b/core/src/main/scala/org/apache/spark/util/Utils.scala
@@ -2352,6 +2352,31 @@ private[spark] object Utils extends Logging {
 log.info(s"Started daemon with process name: ${Utils.getProcessName()}")
 SignalUtils.registerLogger(log)
   }
+
+  /**
+   * Unions two comma-separated lists of files and filters out empty strings.
+   */
+  def unionFileLists(leftList: Option[String], rightList: Option[String]): 
Set[String] = {
+var allFiles = Set[String]()
+leftList.foreach { value => allFiles ++= value.split(",") }
+rightList.foreach { value => allFiles ++= value.split(",") }
+allFiles.filter { _.nonEmpty }
+  }
+
+  /**
+   * In YARN mode this method returns a union of the jar files pointed by 
"spark.jars" and the
+   * "spark.yarn.dist.jars" properties, while in other modes it returns the 
jar files pointed by
+   * only the "spark.jars" property.
+   */
+  def getUserJars(conf: SparkConf): Seq[String] = {
+val sparkJars = conf.getOption("spark.jars")
+if (conf.get("spark.master") == "yarn") {
+  val yarnJars = conf.getOption("spark.yarn.dist.jars")
+  unionFileLists(sparkJars, yarnJars).toSeq
+} else {
+  sparkJars.map(_.split(",")).map(_.filter(_.nonEmpty)).toSeq.flatten
+}
+  }
 }
 
 /**

http://git-wip-us.apache.org/repos/asf/spark/blob/feaba979/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala
--
diff --git a/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala 
b/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala
index 2718976..0b02059 100644
--- a/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala
@@ -570,6 +570,18 @@ class SparkSubmitSuite
   appArgs.executorMemory should be ("2.3g")
 }
   }
+
+  test("comma separated list of files are unioned correctly") {
+val left = Option("/tmp/a.jar,/tmp/b.jar")
+val right = Option("/tmp/c.jar,/tmp/a.jar")
+val emptyString = Opt

spark git commit: [SPARK-15966][DOC] Add closing tag to fix rendering issue for Spark monitoring

2016-06-16 Thread andrewor14
Repository: spark
Updated Branches:
  refs/heads/master 9040d83bc -> f1bf0d2f3


[SPARK-15966][DOC] Add closing tag to fix rendering issue for Spark monitoring

## What changes were proposed in this pull request?
Adds the missing closing tag for spark.ui.view.acls.groups

## How was this patch tested?
I built the docs locally and verified the changed in browser.

(If this patch involves UI changes, please attach a screenshot; otherwise, 
remove this)
**Before:**
![image](https://cloud.githubusercontent.com/assets/7732317/16135005/49fc0724-33e6-11e6-9390-98711593fa5b.png)

**After:**
![image](https://cloud.githubusercontent.com/assets/7732317/16135021/62b5c4a8-33e6-11e6-8118-b22fda5c66eb.png)

Author: Dhruve Ashar 

Closes #13719 from dhruve/doc/SPARK-15966.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/f1bf0d2f
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/f1bf0d2f
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/f1bf0d2f

Branch: refs/heads/master
Commit: f1bf0d2f3a61d81686f36763e83d3be89c98435f
Parents: 9040d83
Author: Dhruve Ashar 
Authored: Thu Jun 16 16:44:54 2016 -0700
Committer: Andrew Or 
Committed: Thu Jun 16 17:46:19 2016 -0700

--
 docs/monitoring.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/f1bf0d2f/docs/monitoring.md
--
diff --git a/docs/monitoring.md b/docs/monitoring.md
index 78a3470..fa6c899 100644
--- a/docs/monitoring.md
+++ b/docs/monitoring.md
@@ -157,7 +157,7 @@ The history server can be configured as follows:
   If enabled, access control checks are made regardless of what the 
individual application had
   set for spark.ui.acls.enable when the application was run. 
The application owner
   will always have authorization to view their own application and any 
users specified via
-  spark.ui.view.acls and groups specified via 
spark.ui.view.acls.groups
+  spark.ui.view.acls and groups specified via 
spark.ui.view.acls.groups
   when the application was run will also have authorization to view that 
application.
   If disabled, no access control checks are made.
 


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org



spark git commit: [SPARK-15608][ML][EXAMPLES][DOC] add examples and documents of ml.isotonic regression

2016-06-16 Thread yliang
Repository: spark
Updated Branches:
  refs/heads/branch-2.0 b3678eb7e -> 68e7a25cc


[SPARK-15608][ML][EXAMPLES][DOC] add examples and documents of ml.isotonic 
regression

## What changes were proposed in this pull request?

add ml doc for ml isotonic regression
add scala example for ml isotonic regression
add java example for ml isotonic regression
add python example for ml isotonic regression

modify scala example for mllib isotonic regression
modify java example for mllib isotonic regression
modify python example for mllib isotonic regression

add data/mllib/sample_isotonic_regression_libsvm_data.txt
delete data/mllib/sample_isotonic_regression_data.txt
## How was this patch tested?

N/A

Author: WeichenXu 

Closes #13381 from WeichenXu123/add_isotonic_regression_doc.

(cherry picked from commit 9040d83bc2cdce06dab0e1bdee4f796da9a9a55c)
Signed-off-by: Yanbo Liang 


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/68e7a25c
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/68e7a25c
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/68e7a25c

Branch: refs/heads/branch-2.0
Commit: 68e7a25cc06cbfe357be8d224c117abaa7ba94f4
Parents: b3678eb
Author: WeichenXu 
Authored: Thu Jun 16 17:35:40 2016 -0700
Committer: Yanbo Liang 
Committed: Thu Jun 16 17:35:51 2016 -0700

--
 data/mllib/sample_isotonic_regression_data.txt  | 100 ---
 .../sample_isotonic_regression_libsvm_data.txt  | 100 +++
 docs/ml-classification-regression.md|  70 +
 .../ml/JavaIsotonicRegressionExample.java   |  62 
 .../mllib/JavaIsotonicRegressionExample.java|  19 ++--
 .../python/ml/isotonic_regression_example.py|  54 ++
 .../python/mllib/isotonic_regression_example.py |  11 +-
 .../examples/ml/IsotonicRegressionExample.scala |  62 
 .../mllib/IsotonicRegressionExample.scala   |   9 +-
 9 files changed, 373 insertions(+), 114 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/68e7a25c/data/mllib/sample_isotonic_regression_data.txt
--
diff --git a/data/mllib/sample_isotonic_regression_data.txt 
b/data/mllib/sample_isotonic_regression_data.txt
deleted file mode 100644
index d257b50..000
--- a/data/mllib/sample_isotonic_regression_data.txt
+++ /dev/null
@@ -1,100 +0,0 @@
-0.24579296,0.01
-0.28505864,0.02
-0.31208567,0.03
-0.35900051,0.04
-0.35747068,0.05
-0.16675166,0.06
-0.17491076,0.07
-0.04181540,0.08
-0.04793473,0.09
-0.03926568,0.10
-0.12952575,0.11
-0.,0.12
-0.01376849,0.13
-0.13105558,0.14
-0.08873024,0.15
-0.12595614,0.16
-0.15247323,0.17
-0.25956145,0.18
-0.20040796,0.19
-0.19581846,0.20
-0.15757267,0.21
-0.13717491,0.22
-0.19020908,0.23
-0.19581846,0.24
-0.20091790,0.25
-0.16879143,0.26
-0.18510964,0.27
-0.20040796,0.28
-0.29576747,0.29
-0.43396226,0.30
-0.53391127,0.31
-0.52116267,0.32
-0.48546660,0.33
-0.49209587,0.34
-0.54156043,0.35
-0.59765426,0.36
-0.56144824,0.37
-0.58592555,0.38
-0.52983172,0.39
-0.50178480,0.40
-0.52626211,0.41
-0.58286588,0.42
-0.64660887,0.43
-0.68077511,0.44
-0.74298827,0.45
-0.64864865,0.46
-0.67261601,0.47
-0.65782764,0.48
-0.69811321,0.49
-0.63029067,0.50
-0.61601224,0.51
-0.63233044,0.52
-0.65323814,0.53
-0.65323814,0.54
-0.67363590,0.55
-0.67006629,0.56
-0.51555329,0.57
-0.50892402,0.58
-0.33299337,0.59
-0.36206017,0.60
-0.43090260,0.61
-0.45996940,0.62
-0.56348802,0.63
-0.54920959,0.64
-0.48393677,0.65
-0.48495665,0.66
-0.46965834,0.67
-0.45181030,0.68
-0.45843957,0.69
-0.47118817,0.70
-0.51555329,0.71
-0.58031617,0.72
-0.55481897,0.73
-0.56297807,0.74
-0.56603774,0.75
-0.57929628,0.76
-0.64762876,0.77
-0.66241713,0.78
-0.69301377,0.79
-0.65119837,0.80
-0.68332483,0.81
-0.66598674,0.82
-0.73890872,0.83
-0.73992861,0.84
-0.84242733,0.85
-0.91330954,0.86
-0.88016318,0.87
-0.90719021,0.88
-0.93115757,0.89
-0.93115757,0.90
-0.91942886,0.91
-0.92911780,0.92
-0.95665477,0.93
-0.95002550,0.94
-0.96940337,0.95
-1.,0.96
-0.89801122,0.97
-0.90311066,0.98
-0.90362060,0.99
-0.83477817,1.0
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/spark/blob/68e7a25c/data/mllib/sample_isotonic_regression_libsvm_data.txt
--
diff --git a/data/mllib/sample_isotonic_regression_libsvm_data.txt 
b/data/mllib/sample_isotonic_regression_libsvm_data.txt
new file mode 100644
index 000..f39fe02
--- /dev/null
+++ b/data/mllib/sample_isotonic_regression_libsvm_data.txt
@@ -0,0 +1,100 @@
+0.24579296 1:0.01
+0.28505864 1:0.02
+0.31208567 1:0.03
+0.35900051 1:0.04
+0.35747068 1:0.05
+0.16675166 1:0.06
+0.17491076 1:0.07
+0.04181540 1:0.08
+0.04793473 1:0.09
+0.03926568 1:0.10
+0.12952575 1:0.11
+0. 1:0.12
+0.01376849 1:0.13

spark git commit: [SPARK-15608][ML][EXAMPLES][DOC] add examples and documents of ml.isotonic regression

2016-06-16 Thread yliang
Repository: spark
Updated Branches:
  refs/heads/master d9c6628c4 -> 9040d83bc


[SPARK-15608][ML][EXAMPLES][DOC] add examples and documents of ml.isotonic 
regression

## What changes were proposed in this pull request?

add ml doc for ml isotonic regression
add scala example for ml isotonic regression
add java example for ml isotonic regression
add python example for ml isotonic regression

modify scala example for mllib isotonic regression
modify java example for mllib isotonic regression
modify python example for mllib isotonic regression

add data/mllib/sample_isotonic_regression_libsvm_data.txt
delete data/mllib/sample_isotonic_regression_data.txt
## How was this patch tested?

N/A

Author: WeichenXu 

Closes #13381 from WeichenXu123/add_isotonic_regression_doc.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/9040d83b
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/9040d83b
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/9040d83b

Branch: refs/heads/master
Commit: 9040d83bc2cdce06dab0e1bdee4f796da9a9a55c
Parents: d9c6628
Author: WeichenXu 
Authored: Thu Jun 16 17:35:40 2016 -0700
Committer: Yanbo Liang 
Committed: Thu Jun 16 17:35:40 2016 -0700

--
 data/mllib/sample_isotonic_regression_data.txt  | 100 ---
 .../sample_isotonic_regression_libsvm_data.txt  | 100 +++
 docs/ml-classification-regression.md|  70 +
 .../ml/JavaIsotonicRegressionExample.java   |  62 
 .../mllib/JavaIsotonicRegressionExample.java|  19 ++--
 .../python/ml/isotonic_regression_example.py|  54 ++
 .../python/mllib/isotonic_regression_example.py |  11 +-
 .../examples/ml/IsotonicRegressionExample.scala |  62 
 .../mllib/IsotonicRegressionExample.scala   |   9 +-
 9 files changed, 373 insertions(+), 114 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/9040d83b/data/mllib/sample_isotonic_regression_data.txt
--
diff --git a/data/mllib/sample_isotonic_regression_data.txt 
b/data/mllib/sample_isotonic_regression_data.txt
deleted file mode 100644
index d257b50..000
--- a/data/mllib/sample_isotonic_regression_data.txt
+++ /dev/null
@@ -1,100 +0,0 @@
-0.24579296,0.01
-0.28505864,0.02
-0.31208567,0.03
-0.35900051,0.04
-0.35747068,0.05
-0.16675166,0.06
-0.17491076,0.07
-0.04181540,0.08
-0.04793473,0.09
-0.03926568,0.10
-0.12952575,0.11
-0.,0.12
-0.01376849,0.13
-0.13105558,0.14
-0.08873024,0.15
-0.12595614,0.16
-0.15247323,0.17
-0.25956145,0.18
-0.20040796,0.19
-0.19581846,0.20
-0.15757267,0.21
-0.13717491,0.22
-0.19020908,0.23
-0.19581846,0.24
-0.20091790,0.25
-0.16879143,0.26
-0.18510964,0.27
-0.20040796,0.28
-0.29576747,0.29
-0.43396226,0.30
-0.53391127,0.31
-0.52116267,0.32
-0.48546660,0.33
-0.49209587,0.34
-0.54156043,0.35
-0.59765426,0.36
-0.56144824,0.37
-0.58592555,0.38
-0.52983172,0.39
-0.50178480,0.40
-0.52626211,0.41
-0.58286588,0.42
-0.64660887,0.43
-0.68077511,0.44
-0.74298827,0.45
-0.64864865,0.46
-0.67261601,0.47
-0.65782764,0.48
-0.69811321,0.49
-0.63029067,0.50
-0.61601224,0.51
-0.63233044,0.52
-0.65323814,0.53
-0.65323814,0.54
-0.67363590,0.55
-0.67006629,0.56
-0.51555329,0.57
-0.50892402,0.58
-0.33299337,0.59
-0.36206017,0.60
-0.43090260,0.61
-0.45996940,0.62
-0.56348802,0.63
-0.54920959,0.64
-0.48393677,0.65
-0.48495665,0.66
-0.46965834,0.67
-0.45181030,0.68
-0.45843957,0.69
-0.47118817,0.70
-0.51555329,0.71
-0.58031617,0.72
-0.55481897,0.73
-0.56297807,0.74
-0.56603774,0.75
-0.57929628,0.76
-0.64762876,0.77
-0.66241713,0.78
-0.69301377,0.79
-0.65119837,0.80
-0.68332483,0.81
-0.66598674,0.82
-0.73890872,0.83
-0.73992861,0.84
-0.84242733,0.85
-0.91330954,0.86
-0.88016318,0.87
-0.90719021,0.88
-0.93115757,0.89
-0.93115757,0.90
-0.91942886,0.91
-0.92911780,0.92
-0.95665477,0.93
-0.95002550,0.94
-0.96940337,0.95
-1.,0.96
-0.89801122,0.97
-0.90311066,0.98
-0.90362060,0.99
-0.83477817,1.0
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/spark/blob/9040d83b/data/mllib/sample_isotonic_regression_libsvm_data.txt
--
diff --git a/data/mllib/sample_isotonic_regression_libsvm_data.txt 
b/data/mllib/sample_isotonic_regression_libsvm_data.txt
new file mode 100644
index 000..f39fe02
--- /dev/null
+++ b/data/mllib/sample_isotonic_regression_libsvm_data.txt
@@ -0,0 +1,100 @@
+0.24579296 1:0.01
+0.28505864 1:0.02
+0.31208567 1:0.03
+0.35900051 1:0.04
+0.35747068 1:0.05
+0.16675166 1:0.06
+0.17491076 1:0.07
+0.04181540 1:0.08
+0.04793473 1:0.09
+0.03926568 1:0.10
+0.12952575 1:0.11
+0. 1:0.12
+0.01376849 1:0.13
+0.13105558 1:0.14
+0.08873024 1:0.15
+0.12595614 1:0.16
+0.15247323 1:0.17
+0.25956145 1:0.18
+0.20040796

spark git commit: [SPARK-15991] SparkContext.hadoopConfiguration should be always the base of hadoop conf created by SessionState

2016-06-16 Thread zsxwing
Repository: spark
Updated Branches:
  refs/heads/branch-2.0 8f7138859 -> b3678eb7e


[SPARK-15991] SparkContext.hadoopConfiguration should be always the base of 
hadoop conf created by SessionState

## What changes were proposed in this pull request?
Before this patch, after a SparkSession has been created, hadoop conf set 
directly to SparkContext.hadoopConfiguration will not affect the hadoop conf 
created by SessionState. This patch makes the change to always use 
SparkContext.hadoopConfiguration  as the base.

This patch also changes the behavior of hive-site.xml support added in 
https://github.com/apache/spark/pull/12689/. With this patch, we will load 
hive-site.xml to SparkContext.hadoopConfiguration.

## How was this patch tested?
New test in SparkSessionBuilderSuite.

Author: Yin Huai 

Closes #13711 from yhuai/SPARK-15991.

(cherry picked from commit d9c6628c47de547dc537310e3c775c7f3e0e4a12)
Signed-off-by: Shixiong Zhu 


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/b3678eb7
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/b3678eb7
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/b3678eb7

Branch: refs/heads/branch-2.0
Commit: b3678eb7e4ac6bb08ba8579867944ba42da99b81
Parents: 8f71388
Author: Yin Huai 
Authored: Thu Jun 16 17:06:24 2016 -0700
Committer: Shixiong Zhu 
Committed: Thu Jun 16 17:06:30 2016 -0700

--
 .../spark/sql/internal/SessionState.scala   |  2 +-
 .../apache/spark/sql/internal/SharedState.scala | 14 --
 .../org/apache/spark/sql/SQLQuerySuite.scala|  4 
 .../spark/sql/SparkSessionBuilderSuite.scala| 20 
 .../apache/spark/sql/hive/HiveSharedState.scala |  5 +++--
 5 files changed, 28 insertions(+), 17 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/b3678eb7/sql/core/src/main/scala/org/apache/spark/sql/internal/SessionState.scala
--
diff --git 
a/sql/core/src/main/scala/org/apache/spark/sql/internal/SessionState.scala 
b/sql/core/src/main/scala/org/apache/spark/sql/internal/SessionState.scala
index 59efa81..dc95123 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/internal/SessionState.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/SessionState.scala
@@ -49,7 +49,7 @@ private[sql] class SessionState(sparkSession: SparkSession) {
   lazy val conf: SQLConf = new SQLConf
 
   def newHadoopConf(): Configuration = {
-val hadoopConf = new Configuration(sparkSession.sharedState.hadoopConf)
+val hadoopConf = new 
Configuration(sparkSession.sparkContext.hadoopConfiguration)
 conf.getAllConfs.foreach { case (k, v) => if (v ne null) hadoopConf.set(k, 
v) }
 hadoopConf
   }

http://git-wip-us.apache.org/repos/asf/spark/blob/b3678eb7/sql/core/src/main/scala/org/apache/spark/sql/internal/SharedState.scala
--
diff --git 
a/sql/core/src/main/scala/org/apache/spark/sql/internal/SharedState.scala 
b/sql/core/src/main/scala/org/apache/spark/sql/internal/SharedState.scala
index bc349b4..6c43fe3 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/internal/SharedState.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/SharedState.scala
@@ -43,23 +43,17 @@ private[sql] class SharedState(val sparkContext: 
SparkContext) extends Logging {
*/
   val listener: SQLListener = createListenerAndUI(sparkContext)
 
-  /**
-   * The base hadoop configuration which is shared among all spark sessions. 
It is based on the
-   * default hadoop configuration of Spark, with custom configurations inside 
`hive-site.xml`.
-   */
-  val hadoopConf: Configuration = {
-val conf = new Configuration(sparkContext.hadoopConfiguration)
+  {
 val configFile = 
Utils.getContextOrSparkClassLoader.getResource("hive-site.xml")
 if (configFile != null) {
-  conf.addResource(configFile)
+  sparkContext.hadoopConfiguration.addResource(configFile)
 }
-conf
   }
 
   /**
* A catalog that interacts with external systems.
*/
-  lazy val externalCatalog: ExternalCatalog = new InMemoryCatalog(hadoopConf)
+  lazy val externalCatalog: ExternalCatalog = new 
InMemoryCatalog(sparkContext.hadoopConfiguration)
 
   /**
* A classloader used to load all user-added jar.
@@ -71,7 +65,7 @@ private[sql] class SharedState(val sparkContext: 
SparkContext) extends Logging {
 // Set the Hive metastore warehouse path to the one we use
 val tempConf = new SQLConf
 sparkContext.conf.getAll.foreach { case (k, v) => 
tempConf.setConfString(k, v) }
-val hiveWarehouseDir = hadoopConf.get("hive.metastore.warehouse.dir")
+val hiveWarehouseDir = 
sparkContext.hadoopConfiguration.get("hive.metastore.warehouse.dir")
 if (h

spark git commit: [SPARK-15991] SparkContext.hadoopConfiguration should be always the base of hadoop conf created by SessionState

2016-06-16 Thread zsxwing
Repository: spark
Updated Branches:
  refs/heads/master 62d2fa5e9 -> d9c6628c4


[SPARK-15991] SparkContext.hadoopConfiguration should be always the base of 
hadoop conf created by SessionState

## What changes were proposed in this pull request?
Before this patch, after a SparkSession has been created, hadoop conf set 
directly to SparkContext.hadoopConfiguration will not affect the hadoop conf 
created by SessionState. This patch makes the change to always use 
SparkContext.hadoopConfiguration  as the base.

This patch also changes the behavior of hive-site.xml support added in 
https://github.com/apache/spark/pull/12689/. With this patch, we will load 
hive-site.xml to SparkContext.hadoopConfiguration.

## How was this patch tested?
New test in SparkSessionBuilderSuite.

Author: Yin Huai 

Closes #13711 from yhuai/SPARK-15991.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/d9c6628c
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/d9c6628c
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/d9c6628c

Branch: refs/heads/master
Commit: d9c6628c47de547dc537310e3c775c7f3e0e4a12
Parents: 62d2fa5
Author: Yin Huai 
Authored: Thu Jun 16 17:06:24 2016 -0700
Committer: Shixiong Zhu 
Committed: Thu Jun 16 17:06:24 2016 -0700

--
 .../spark/sql/internal/SessionState.scala   |  2 +-
 .../apache/spark/sql/internal/SharedState.scala | 14 --
 .../org/apache/spark/sql/SQLQuerySuite.scala|  4 
 .../spark/sql/SparkSessionBuilderSuite.scala| 20 
 .../apache/spark/sql/hive/HiveSharedState.scala |  5 +++--
 5 files changed, 28 insertions(+), 17 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/d9c6628c/sql/core/src/main/scala/org/apache/spark/sql/internal/SessionState.scala
--
diff --git 
a/sql/core/src/main/scala/org/apache/spark/sql/internal/SessionState.scala 
b/sql/core/src/main/scala/org/apache/spark/sql/internal/SessionState.scala
index 59efa81..dc95123 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/internal/SessionState.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/SessionState.scala
@@ -49,7 +49,7 @@ private[sql] class SessionState(sparkSession: SparkSession) {
   lazy val conf: SQLConf = new SQLConf
 
   def newHadoopConf(): Configuration = {
-val hadoopConf = new Configuration(sparkSession.sharedState.hadoopConf)
+val hadoopConf = new 
Configuration(sparkSession.sparkContext.hadoopConfiguration)
 conf.getAllConfs.foreach { case (k, v) => if (v ne null) hadoopConf.set(k, 
v) }
 hadoopConf
   }

http://git-wip-us.apache.org/repos/asf/spark/blob/d9c6628c/sql/core/src/main/scala/org/apache/spark/sql/internal/SharedState.scala
--
diff --git 
a/sql/core/src/main/scala/org/apache/spark/sql/internal/SharedState.scala 
b/sql/core/src/main/scala/org/apache/spark/sql/internal/SharedState.scala
index bc349b4..6c43fe3 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/internal/SharedState.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/SharedState.scala
@@ -43,23 +43,17 @@ private[sql] class SharedState(val sparkContext: 
SparkContext) extends Logging {
*/
   val listener: SQLListener = createListenerAndUI(sparkContext)
 
-  /**
-   * The base hadoop configuration which is shared among all spark sessions. 
It is based on the
-   * default hadoop configuration of Spark, with custom configurations inside 
`hive-site.xml`.
-   */
-  val hadoopConf: Configuration = {
-val conf = new Configuration(sparkContext.hadoopConfiguration)
+  {
 val configFile = 
Utils.getContextOrSparkClassLoader.getResource("hive-site.xml")
 if (configFile != null) {
-  conf.addResource(configFile)
+  sparkContext.hadoopConfiguration.addResource(configFile)
 }
-conf
   }
 
   /**
* A catalog that interacts with external systems.
*/
-  lazy val externalCatalog: ExternalCatalog = new InMemoryCatalog(hadoopConf)
+  lazy val externalCatalog: ExternalCatalog = new 
InMemoryCatalog(sparkContext.hadoopConfiguration)
 
   /**
* A classloader used to load all user-added jar.
@@ -71,7 +65,7 @@ private[sql] class SharedState(val sparkContext: 
SparkContext) extends Logging {
 // Set the Hive metastore warehouse path to the one we use
 val tempConf = new SQLConf
 sparkContext.conf.getAll.foreach { case (k, v) => 
tempConf.setConfString(k, v) }
-val hiveWarehouseDir = hadoopConf.get("hive.metastore.warehouse.dir")
+val hiveWarehouseDir = 
sparkContext.hadoopConfiguration.get("hive.metastore.warehouse.dir")
 if (hiveWarehouseDir != null && 
!tempConf.contains(SQLConf.WAREHOUSE_PATH.key)) {
   // If hive.metastore.w

spark git commit: [SPARK-15966][DOC] Add closing tag to fix rendering issue for Spark monitoring

2016-06-16 Thread rxin
Repository: spark
Updated Branches:
  refs/heads/branch-2.0 2280ad8a3 -> 8f7138859


[SPARK-15966][DOC] Add closing tag to fix rendering issue for Spark monitoring

## What changes were proposed in this pull request?
Adds the missing closing tag for spark.ui.view.acls.groups

## How was this patch tested?
I built the docs locally and verified the changed in browser.

(If this patch involves UI changes, please attach a screenshot; otherwise, 
remove this)
**Before:**
![image](https://cloud.githubusercontent.com/assets/7732317/16135005/49fc0724-33e6-11e6-9390-98711593fa5b.png)

**After:**
![image](https://cloud.githubusercontent.com/assets/7732317/16135021/62b5c4a8-33e6-11e6-8118-b22fda5c66eb.png)

Author: Dhruve Ashar 

Closes #13719 from dhruve/doc/SPARK-15966.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/8f713885
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/8f713885
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/8f713885

Branch: refs/heads/branch-2.0
Commit: 8f713885963a410571370faa6b147cd0ada3832b
Parents: 2280ad8
Author: Dhruve Ashar 
Authored: Thu Jun 16 16:44:54 2016 -0700
Committer: Reynold Xin 
Committed: Thu Jun 16 16:44:54 2016 -0700

--
 docs/monitoring.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/8f713885/docs/monitoring.md
--
diff --git a/docs/monitoring.md b/docs/monitoring.md
index 78a3470..fa6c899 100644
--- a/docs/monitoring.md
+++ b/docs/monitoring.md
@@ -157,7 +157,7 @@ The history server can be configured as follows:
   If enabled, access control checks are made regardless of what the 
individual application had
   set for spark.ui.acls.enable when the application was run. 
The application owner
   will always have authorization to view their own application and any 
users specified via
-  spark.ui.view.acls and groups specified via 
spark.ui.view.acls.groups
+  spark.ui.view.acls and groups specified via 
spark.ui.view.acls.groups
   when the application was run will also have authorization to view that 
application.
   If disabled, no access control checks are made.
 


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org



[1/2] spark git commit: Preparing Spark release v1.6.2-rc1

2016-06-16 Thread pwendell
Repository: spark
Updated Branches:
  refs/heads/branch-1.6 b8f380f79 -> 4621fe94b


Preparing Spark release v1.6.2-rc1


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/4168d9c9
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/4168d9c9
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/4168d9c9

Branch: refs/heads/branch-1.6
Commit: 4168d9c94a9564f6b3e62f5d669acde13a7c7cf6
Parents: b8f380f
Author: Patrick Wendell 
Authored: Thu Jun 16 16:40:19 2016 -0700
Committer: Patrick Wendell 
Committed: Thu Jun 16 16:40:19 2016 -0700

--
 assembly/pom.xml| 2 +-
 bagel/pom.xml   | 2 +-
 core/pom.xml| 2 +-
 docker-integration-tests/pom.xml| 2 +-
 examples/pom.xml| 2 +-
 external/flume-assembly/pom.xml | 2 +-
 external/flume-sink/pom.xml | 2 +-
 external/flume/pom.xml  | 2 +-
 external/kafka-assembly/pom.xml | 2 +-
 external/kafka/pom.xml  | 2 +-
 external/mqtt-assembly/pom.xml  | 2 +-
 external/mqtt/pom.xml   | 2 +-
 external/twitter/pom.xml| 2 +-
 external/zeromq/pom.xml | 2 +-
 extras/java8-tests/pom.xml  | 2 +-
 extras/kinesis-asl-assembly/pom.xml | 2 +-
 extras/kinesis-asl/pom.xml  | 2 +-
 extras/spark-ganglia-lgpl/pom.xml   | 2 +-
 graphx/pom.xml  | 2 +-
 launcher/pom.xml| 2 +-
 mllib/pom.xml   | 2 +-
 network/common/pom.xml  | 2 +-
 network/shuffle/pom.xml | 2 +-
 network/yarn/pom.xml| 2 +-
 pom.xml | 2 +-
 repl/pom.xml| 2 +-
 sql/catalyst/pom.xml| 2 +-
 sql/core/pom.xml| 2 +-
 sql/hive-thriftserver/pom.xml   | 2 +-
 sql/hive/pom.xml| 2 +-
 streaming/pom.xml   | 2 +-
 tags/pom.xml| 2 +-
 tools/pom.xml   | 2 +-
 unsafe/pom.xml  | 2 +-
 yarn/pom.xml| 2 +-
 35 files changed, 35 insertions(+), 35 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/4168d9c9/assembly/pom.xml
--
diff --git a/assembly/pom.xml b/assembly/pom.xml
index 6ec2ca4..438e6ed 100644
--- a/assembly/pom.xml
+++ b/assembly/pom.xml
@@ -21,7 +21,7 @@
   
 org.apache.spark
 spark-parent_2.10
-1.6.3-SNAPSHOT
+1.6.2
 ../pom.xml
   
 

http://git-wip-us.apache.org/repos/asf/spark/blob/4168d9c9/bagel/pom.xml
--
diff --git a/bagel/pom.xml b/bagel/pom.xml
index 2d778c5..85be37f 100644
--- a/bagel/pom.xml
+++ b/bagel/pom.xml
@@ -21,7 +21,7 @@
   
 org.apache.spark
 spark-parent_2.10
-1.6.3-SNAPSHOT
+1.6.2
 ../pom.xml
   
 

http://git-wip-us.apache.org/repos/asf/spark/blob/4168d9c9/core/pom.xml
--
diff --git a/core/pom.xml b/core/pom.xml
index a8d7863..15e60a3 100644
--- a/core/pom.xml
+++ b/core/pom.xml
@@ -21,7 +21,7 @@
   
 org.apache.spark
 spark-parent_2.10
-1.6.3-SNAPSHOT
+1.6.2
 ../pom.xml
   
 

http://git-wip-us.apache.org/repos/asf/spark/blob/4168d9c9/docker-integration-tests/pom.xml
--
diff --git a/docker-integration-tests/pom.xml b/docker-integration-tests/pom.xml
index a06e59c..0bc749f 100644
--- a/docker-integration-tests/pom.xml
+++ b/docker-integration-tests/pom.xml
@@ -22,7 +22,7 @@
   
 org.apache.spark
 spark-parent_2.10
-1.6.3-SNAPSHOT
+1.6.2
 ../pom.xml
   
 

http://git-wip-us.apache.org/repos/asf/spark/blob/4168d9c9/examples/pom.xml
--
diff --git a/examples/pom.xml b/examples/pom.xml
index 8e9e02e..f771a36 100644
--- a/examples/pom.xml
+++ b/examples/pom.xml
@@ -21,7 +21,7 @@
   
 org.apache.spark
 spark-parent_2.10
-1.6.3-SNAPSHOT
+1.6.2
 ../pom.xml
   
 

http://git-wip-us.apache.org/repos/asf/spark/blob/4168d9c9/external/flume-assembly/pom.xml
--
diff --git a/external/flume-assembly/pom.xml b/external/flume-assembly/pom.xml
index 52c8a91..1ef7e7f 100644
--- a/external/flume-assembly/pom.xml
+++ b/external/flume-assembly/pom.xml
@@ -21,7 +21,7 @@
   
 org.apache.spark
 spark-parent_2.10
-1.6.3-SNAPSHOT
+1.6.2
 ../../pom.xml
   
 

http://git-wip-us.apache.org/repos/asf/spark/blob/4168d9c9/external/flume-sink/pom.xml
--
diff --gi

[2/2] spark git commit: Preparing development version 1.6.3-SNAPSHOT

2016-06-16 Thread pwendell
Preparing development version 1.6.3-SNAPSHOT


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/4621fe94
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/4621fe94
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/4621fe94

Branch: refs/heads/branch-1.6
Commit: 4621fe94b0bdcbb0634e41db926d1d9a98e5014e
Parents: 4168d9c
Author: Patrick Wendell 
Authored: Thu Jun 16 16:40:26 2016 -0700
Committer: Patrick Wendell 
Committed: Thu Jun 16 16:40:26 2016 -0700

--
 assembly/pom.xml| 2 +-
 bagel/pom.xml   | 2 +-
 core/pom.xml| 2 +-
 docker-integration-tests/pom.xml| 2 +-
 examples/pom.xml| 2 +-
 external/flume-assembly/pom.xml | 2 +-
 external/flume-sink/pom.xml | 2 +-
 external/flume/pom.xml  | 2 +-
 external/kafka-assembly/pom.xml | 2 +-
 external/kafka/pom.xml  | 2 +-
 external/mqtt-assembly/pom.xml  | 2 +-
 external/mqtt/pom.xml   | 2 +-
 external/twitter/pom.xml| 2 +-
 external/zeromq/pom.xml | 2 +-
 extras/java8-tests/pom.xml  | 2 +-
 extras/kinesis-asl-assembly/pom.xml | 2 +-
 extras/kinesis-asl/pom.xml  | 2 +-
 extras/spark-ganglia-lgpl/pom.xml   | 2 +-
 graphx/pom.xml  | 2 +-
 launcher/pom.xml| 2 +-
 mllib/pom.xml   | 2 +-
 network/common/pom.xml  | 2 +-
 network/shuffle/pom.xml | 2 +-
 network/yarn/pom.xml| 2 +-
 pom.xml | 2 +-
 repl/pom.xml| 2 +-
 sql/catalyst/pom.xml| 2 +-
 sql/core/pom.xml| 2 +-
 sql/hive-thriftserver/pom.xml   | 2 +-
 sql/hive/pom.xml| 2 +-
 streaming/pom.xml   | 2 +-
 tags/pom.xml| 2 +-
 tools/pom.xml   | 2 +-
 unsafe/pom.xml  | 2 +-
 yarn/pom.xml| 2 +-
 35 files changed, 35 insertions(+), 35 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/4621fe94/assembly/pom.xml
--
diff --git a/assembly/pom.xml b/assembly/pom.xml
index 438e6ed..6ec2ca4 100644
--- a/assembly/pom.xml
+++ b/assembly/pom.xml
@@ -21,7 +21,7 @@
   
 org.apache.spark
 spark-parent_2.10
-1.6.2
+1.6.3-SNAPSHOT
 ../pom.xml
   
 

http://git-wip-us.apache.org/repos/asf/spark/blob/4621fe94/bagel/pom.xml
--
diff --git a/bagel/pom.xml b/bagel/pom.xml
index 85be37f..2d778c5 100644
--- a/bagel/pom.xml
+++ b/bagel/pom.xml
@@ -21,7 +21,7 @@
   
 org.apache.spark
 spark-parent_2.10
-1.6.2
+1.6.3-SNAPSHOT
 ../pom.xml
   
 

http://git-wip-us.apache.org/repos/asf/spark/blob/4621fe94/core/pom.xml
--
diff --git a/core/pom.xml b/core/pom.xml
index 15e60a3..a8d7863 100644
--- a/core/pom.xml
+++ b/core/pom.xml
@@ -21,7 +21,7 @@
   
 org.apache.spark
 spark-parent_2.10
-1.6.2
+1.6.3-SNAPSHOT
 ../pom.xml
   
 

http://git-wip-us.apache.org/repos/asf/spark/blob/4621fe94/docker-integration-tests/pom.xml
--
diff --git a/docker-integration-tests/pom.xml b/docker-integration-tests/pom.xml
index 0bc749f..a06e59c 100644
--- a/docker-integration-tests/pom.xml
+++ b/docker-integration-tests/pom.xml
@@ -22,7 +22,7 @@
   
 org.apache.spark
 spark-parent_2.10
-1.6.2
+1.6.3-SNAPSHOT
 ../pom.xml
   
 

http://git-wip-us.apache.org/repos/asf/spark/blob/4621fe94/examples/pom.xml
--
diff --git a/examples/pom.xml b/examples/pom.xml
index f771a36..8e9e02e 100644
--- a/examples/pom.xml
+++ b/examples/pom.xml
@@ -21,7 +21,7 @@
   
 org.apache.spark
 spark-parent_2.10
-1.6.2
+1.6.3-SNAPSHOT
 ../pom.xml
   
 

http://git-wip-us.apache.org/repos/asf/spark/blob/4621fe94/external/flume-assembly/pom.xml
--
diff --git a/external/flume-assembly/pom.xml b/external/flume-assembly/pom.xml
index 1ef7e7f..52c8a91 100644
--- a/external/flume-assembly/pom.xml
+++ b/external/flume-assembly/pom.xml
@@ -21,7 +21,7 @@
   
 org.apache.spark
 spark-parent_2.10
-1.6.2
+1.6.3-SNAPSHOT
 ../../pom.xml
   
 

http://git-wip-us.apache.org/repos/asf/spark/blob/4621fe94/external/flume-sink/pom.xml
--
diff --git a/external/flume-sink/pom.xml b/external/flume-sink/pom.xml
index c1cb64b

[spark] Git Push Summary

2016-06-16 Thread pwendell
Repository: spark
Updated Tags:  refs/tags/v1.6.2-rc1 [created] 4168d9c94

-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org



[spark] Git Push Summary

2016-06-16 Thread rxin
Repository: spark
Updated Tags:  refs/tags/v1.6.2 [deleted] f16649304

-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org



[2/2] spark git commit: Preparing development version 1.6.3-SNAPSHOT

2016-06-16 Thread pwendell
Preparing development version 1.6.3-SNAPSHOT


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/b8f380f7
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/b8f380f7
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/b8f380f7

Branch: refs/heads/branch-1.6
Commit: b8f380f79aa46754d308cd9729fa97a76fdb951e
Parents: f166493
Author: Patrick Wendell 
Authored: Thu Jun 16 16:35:51 2016 -0700
Committer: Patrick Wendell 
Committed: Thu Jun 16 16:35:51 2016 -0700

--
 assembly/pom.xml| 2 +-
 bagel/pom.xml   | 2 +-
 core/pom.xml| 2 +-
 docker-integration-tests/pom.xml| 2 +-
 examples/pom.xml| 2 +-
 external/flume-assembly/pom.xml | 2 +-
 external/flume-sink/pom.xml | 2 +-
 external/flume/pom.xml  | 2 +-
 external/kafka-assembly/pom.xml | 2 +-
 external/kafka/pom.xml  | 2 +-
 external/mqtt-assembly/pom.xml  | 2 +-
 external/mqtt/pom.xml   | 2 +-
 external/twitter/pom.xml| 2 +-
 external/zeromq/pom.xml | 2 +-
 extras/java8-tests/pom.xml  | 2 +-
 extras/kinesis-asl-assembly/pom.xml | 2 +-
 extras/kinesis-asl/pom.xml  | 2 +-
 extras/spark-ganglia-lgpl/pom.xml   | 2 +-
 graphx/pom.xml  | 2 +-
 launcher/pom.xml| 2 +-
 mllib/pom.xml   | 2 +-
 network/common/pom.xml  | 2 +-
 network/shuffle/pom.xml | 2 +-
 network/yarn/pom.xml| 2 +-
 pom.xml | 2 +-
 repl/pom.xml| 2 +-
 sql/catalyst/pom.xml| 2 +-
 sql/core/pom.xml| 2 +-
 sql/hive-thriftserver/pom.xml   | 2 +-
 sql/hive/pom.xml| 2 +-
 streaming/pom.xml   | 2 +-
 tags/pom.xml| 2 +-
 tools/pom.xml   | 2 +-
 unsafe/pom.xml  | 2 +-
 yarn/pom.xml| 2 +-
 35 files changed, 35 insertions(+), 35 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/b8f380f7/assembly/pom.xml
--
diff --git a/assembly/pom.xml b/assembly/pom.xml
index 438e6ed..6ec2ca4 100644
--- a/assembly/pom.xml
+++ b/assembly/pom.xml
@@ -21,7 +21,7 @@
   
 org.apache.spark
 spark-parent_2.10
-1.6.2
+1.6.3-SNAPSHOT
 ../pom.xml
   
 

http://git-wip-us.apache.org/repos/asf/spark/blob/b8f380f7/bagel/pom.xml
--
diff --git a/bagel/pom.xml b/bagel/pom.xml
index 85be37f..2d778c5 100644
--- a/bagel/pom.xml
+++ b/bagel/pom.xml
@@ -21,7 +21,7 @@
   
 org.apache.spark
 spark-parent_2.10
-1.6.2
+1.6.3-SNAPSHOT
 ../pom.xml
   
 

http://git-wip-us.apache.org/repos/asf/spark/blob/b8f380f7/core/pom.xml
--
diff --git a/core/pom.xml b/core/pom.xml
index 15e60a3..a8d7863 100644
--- a/core/pom.xml
+++ b/core/pom.xml
@@ -21,7 +21,7 @@
   
 org.apache.spark
 spark-parent_2.10
-1.6.2
+1.6.3-SNAPSHOT
 ../pom.xml
   
 

http://git-wip-us.apache.org/repos/asf/spark/blob/b8f380f7/docker-integration-tests/pom.xml
--
diff --git a/docker-integration-tests/pom.xml b/docker-integration-tests/pom.xml
index 0bc749f..a06e59c 100644
--- a/docker-integration-tests/pom.xml
+++ b/docker-integration-tests/pom.xml
@@ -22,7 +22,7 @@
   
 org.apache.spark
 spark-parent_2.10
-1.6.2
+1.6.3-SNAPSHOT
 ../pom.xml
   
 

http://git-wip-us.apache.org/repos/asf/spark/blob/b8f380f7/examples/pom.xml
--
diff --git a/examples/pom.xml b/examples/pom.xml
index f771a36..8e9e02e 100644
--- a/examples/pom.xml
+++ b/examples/pom.xml
@@ -21,7 +21,7 @@
   
 org.apache.spark
 spark-parent_2.10
-1.6.2
+1.6.3-SNAPSHOT
 ../pom.xml
   
 

http://git-wip-us.apache.org/repos/asf/spark/blob/b8f380f7/external/flume-assembly/pom.xml
--
diff --git a/external/flume-assembly/pom.xml b/external/flume-assembly/pom.xml
index 1ef7e7f..52c8a91 100644
--- a/external/flume-assembly/pom.xml
+++ b/external/flume-assembly/pom.xml
@@ -21,7 +21,7 @@
   
 org.apache.spark
 spark-parent_2.10
-1.6.2
+1.6.3-SNAPSHOT
 ../../pom.xml
   
 

http://git-wip-us.apache.org/repos/asf/spark/blob/b8f380f7/external/flume-sink/pom.xml
--
diff --git a/external/flume-sink/pom.xml b/external/flume-sink/pom.xml
index c1cb64b

[spark] Git Push Summary

2016-06-16 Thread pwendell
Repository: spark
Updated Tags:  refs/tags/v1.6.2 [created] f16649304

-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org



[1/2] spark git commit: Preparing Spark release v1.6.2

2016-06-16 Thread pwendell
Repository: spark
Updated Branches:
  refs/heads/branch-1.6 a4485c3b5 -> b8f380f79


Preparing Spark release v1.6.2


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/f1664930
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/f1664930
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/f1664930

Branch: refs/heads/branch-1.6
Commit: f166493046f33b511d0b1f05e83d913741497648
Parents: a4485c3
Author: Patrick Wendell 
Authored: Thu Jun 16 16:35:44 2016 -0700
Committer: Patrick Wendell 
Committed: Thu Jun 16 16:35:44 2016 -0700

--
 assembly/pom.xml| 2 +-
 bagel/pom.xml   | 2 +-
 core/pom.xml| 2 +-
 docker-integration-tests/pom.xml| 2 +-
 examples/pom.xml| 2 +-
 external/flume-assembly/pom.xml | 2 +-
 external/flume-sink/pom.xml | 2 +-
 external/flume/pom.xml  | 2 +-
 external/kafka-assembly/pom.xml | 2 +-
 external/kafka/pom.xml  | 2 +-
 external/mqtt-assembly/pom.xml  | 2 +-
 external/mqtt/pom.xml   | 2 +-
 external/twitter/pom.xml| 2 +-
 external/zeromq/pom.xml | 2 +-
 extras/java8-tests/pom.xml  | 2 +-
 extras/kinesis-asl-assembly/pom.xml | 2 +-
 extras/kinesis-asl/pom.xml  | 2 +-
 extras/spark-ganglia-lgpl/pom.xml   | 2 +-
 graphx/pom.xml  | 2 +-
 launcher/pom.xml| 2 +-
 mllib/pom.xml   | 2 +-
 network/common/pom.xml  | 2 +-
 network/shuffle/pom.xml | 2 +-
 network/yarn/pom.xml| 2 +-
 pom.xml | 2 +-
 repl/pom.xml| 2 +-
 sql/catalyst/pom.xml| 2 +-
 sql/core/pom.xml| 2 +-
 sql/hive-thriftserver/pom.xml   | 2 +-
 sql/hive/pom.xml| 2 +-
 streaming/pom.xml   | 2 +-
 tags/pom.xml| 2 +-
 tools/pom.xml   | 2 +-
 unsafe/pom.xml  | 2 +-
 yarn/pom.xml| 2 +-
 35 files changed, 35 insertions(+), 35 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/f1664930/assembly/pom.xml
--
diff --git a/assembly/pom.xml b/assembly/pom.xml
index ff345226..438e6ed 100644
--- a/assembly/pom.xml
+++ b/assembly/pom.xml
@@ -21,7 +21,7 @@
   
 org.apache.spark
 spark-parent_2.10
-1.6.2-SNAPSHOT
+1.6.2
 ../pom.xml
   
 

http://git-wip-us.apache.org/repos/asf/spark/blob/f1664930/bagel/pom.xml
--
diff --git a/bagel/pom.xml b/bagel/pom.xml
index e31cf2f..85be37f 100644
--- a/bagel/pom.xml
+++ b/bagel/pom.xml
@@ -21,7 +21,7 @@
   
 org.apache.spark
 spark-parent_2.10
-1.6.2-SNAPSHOT
+1.6.2
 ../pom.xml
   
 

http://git-wip-us.apache.org/repos/asf/spark/blob/f1664930/core/pom.xml
--
diff --git a/core/pom.xml b/core/pom.xml
index 4f909ba..15e60a3 100644
--- a/core/pom.xml
+++ b/core/pom.xml
@@ -21,7 +21,7 @@
   
 org.apache.spark
 spark-parent_2.10
-1.6.2-SNAPSHOT
+1.6.2
 ../pom.xml
   
 

http://git-wip-us.apache.org/repos/asf/spark/blob/f1664930/docker-integration-tests/pom.xml
--
diff --git a/docker-integration-tests/pom.xml b/docker-integration-tests/pom.xml
index 0cce7ff..0bc749f 100644
--- a/docker-integration-tests/pom.xml
+++ b/docker-integration-tests/pom.xml
@@ -22,7 +22,7 @@
   
 org.apache.spark
 spark-parent_2.10
-1.6.2-SNAPSHOT
+1.6.2
 ../pom.xml
   
 

http://git-wip-us.apache.org/repos/asf/spark/blob/f1664930/examples/pom.xml
--
diff --git a/examples/pom.xml b/examples/pom.xml
index ed97dc2..f771a36 100644
--- a/examples/pom.xml
+++ b/examples/pom.xml
@@ -21,7 +21,7 @@
   
 org.apache.spark
 spark-parent_2.10
-1.6.2-SNAPSHOT
+1.6.2
 ../pom.xml
   
 

http://git-wip-us.apache.org/repos/asf/spark/blob/f1664930/external/flume-assembly/pom.xml
--
diff --git a/external/flume-assembly/pom.xml b/external/flume-assembly/pom.xml
index 0d6b563..1ef7e7f 100644
--- a/external/flume-assembly/pom.xml
+++ b/external/flume-assembly/pom.xml
@@ -21,7 +21,7 @@
   
 org.apache.spark
 spark-parent_2.10
-1.6.2-SNAPSHOT
+1.6.2
 ../../pom.xml
   
 

http://git-wip-us.apache.org/repos/asf/spark/blob/f1664930/external/flume-sink/pom.xml
--
diff --git a

spark git commit: Update branch-1.6 for 1.6.2 release.

2016-06-16 Thread rxin
Repository: spark
Updated Branches:
  refs/heads/branch-1.6 0a8ada506 -> a4485c3b5


Update branch-1.6 for 1.6.2 release.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/a4485c3b
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/a4485c3b
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/a4485c3b

Branch: refs/heads/branch-1.6
Commit: a4485c3b561dbcd43bcb203e8ad139901ed581f0
Parents: 0a8ada5
Author: Reynold Xin 
Authored: Thu Jun 16 16:30:18 2016 -0700
Committer: Reynold Xin 
Committed: Thu Jun 16 16:30:18 2016 -0700

--
 core/src/main/scala/org/apache/spark/package.scala | 2 +-
 docs/_config.yml   | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/a4485c3b/core/src/main/scala/org/apache/spark/package.scala
--
diff --git a/core/src/main/scala/org/apache/spark/package.scala 
b/core/src/main/scala/org/apache/spark/package.scala
index 4e2269f..1499d14 100644
--- a/core/src/main/scala/org/apache/spark/package.scala
+++ b/core/src/main/scala/org/apache/spark/package.scala
@@ -43,5 +43,5 @@ package org.apache
 
 package object spark {
   // For package docs only
-  val SPARK_VERSION = "1.6.1"
+  val SPARK_VERSION = "1.6.2"
 }

http://git-wip-us.apache.org/repos/asf/spark/blob/a4485c3b/docs/_config.yml
--
diff --git a/docs/_config.yml b/docs/_config.yml
index 9334516..c2ecb59 100644
--- a/docs/_config.yml
+++ b/docs/_config.yml
@@ -14,8 +14,8 @@ include:
 
 # These allow the documentation to be updated with newer releases
 # of Spark, Scala, and Mesos.
-SPARK_VERSION: 1.6.1
-SPARK_VERSION_SHORT: 1.6.1
+SPARK_VERSION: 1.6.2
+SPARK_VERSION_SHORT: 1.6.2
 SCALA_BINARY_VERSION: "2.10"
 SCALA_VERSION: "2.10.5"
 MESOS_VERSION: 0.21.0


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org



svn commit: r1748776 [2/2] - in /spark: news/_posts/ site/ site/graphx/ site/mllib/ site/news/ site/releases/ site/screencasts/ site/sql/ site/streaming/

2016-06-16 Thread yhuai
Modified: spark/site/news/spark-tips-from-quantifind.html
URL: 
http://svn.apache.org/viewvc/spark/site/news/spark-tips-from-quantifind.html?rev=1748776&r1=1748775&r2=1748776&view=diff
==
--- spark/site/news/spark-tips-from-quantifind.html (original)
+++ spark/site/news/spark-tips-from-quantifind.html Thu Jun 16 22:14:05 2016
@@ -150,6 +150,9 @@
   Latest News
   
 
+  Call 
for Presentations for Spark Summit EU is Open
+  (Jun 16, 2016)
+
   Preview release of 
Spark 2.0
   (May 26, 2016)
 
@@ -159,9 +162,6 @@
   Spark 1.6.1 
released
   (Mar 09, 2016)
 
-  Submission is open for 
Spark Summit San Francisco
-  (Feb 11, 2016)
-
   
   Archive
 

Modified: spark/site/news/spark-user-survey-and-powered-by-page.html
URL: 
http://svn.apache.org/viewvc/spark/site/news/spark-user-survey-and-powered-by-page.html?rev=1748776&r1=1748775&r2=1748776&view=diff
==
--- spark/site/news/spark-user-survey-and-powered-by-page.html (original)
+++ spark/site/news/spark-user-survey-and-powered-by-page.html Thu Jun 16 
22:14:05 2016
@@ -150,6 +150,9 @@
   Latest News
   
 
+  Call 
for Presentations for Spark Summit EU is Open
+  (Jun 16, 2016)
+
   Preview release of 
Spark 2.0
   (May 26, 2016)
 
@@ -159,9 +162,6 @@
   Spark 1.6.1 
released
   (Mar 09, 2016)
 
-  Submission is open for 
Spark Summit San Francisco
-  (Feb 11, 2016)
-
   
   Archive
 

Modified: spark/site/news/spark-version-0-6-0-released.html
URL: 
http://svn.apache.org/viewvc/spark/site/news/spark-version-0-6-0-released.html?rev=1748776&r1=1748775&r2=1748776&view=diff
==
--- spark/site/news/spark-version-0-6-0-released.html (original)
+++ spark/site/news/spark-version-0-6-0-released.html Thu Jun 16 22:14:05 2016
@@ -150,6 +150,9 @@
   Latest News
   
 
+  Call 
for Presentations for Spark Summit EU is Open
+  (Jun 16, 2016)
+
   Preview release of 
Spark 2.0
   (May 26, 2016)
 
@@ -159,9 +162,6 @@
   Spark 1.6.1 
released
   (Mar 09, 2016)
 
-  Submission is open for 
Spark Summit San Francisco
-  (Feb 11, 2016)
-
   
   Archive
 

Modified: spark/site/news/spark-wins-daytona-gray-sort-100tb-benchmark.html
URL: 
http://svn.apache.org/viewvc/spark/site/news/spark-wins-daytona-gray-sort-100tb-benchmark.html?rev=1748776&r1=1748775&r2=1748776&view=diff
==
--- spark/site/news/spark-wins-daytona-gray-sort-100tb-benchmark.html (original)
+++ spark/site/news/spark-wins-daytona-gray-sort-100tb-benchmark.html Thu Jun 
16 22:14:05 2016
@@ -150,6 +150,9 @@
   Latest News
   
 
+  Call 
for Presentations for Spark Summit EU is Open
+  (Jun 16, 2016)
+
   Preview release of 
Spark 2.0
   (May 26, 2016)
 
@@ -159,9 +162,6 @@
   Spark 1.6.1 
released
   (Mar 09, 2016)
 
-  Submission is open for 
Spark Summit San Francisco
-  (Feb 11, 2016)
-
   
   Archive
 

Modified: spark/site/news/strata-exercises-now-available-online.html
URL: 
http://svn.apache.org/viewvc/spark/site/news/strata-exercises-now-available-online.html?rev=1748776&r1=1748775&r2=1748776&view=diff
==
--- spark/site/news/strata-exercises-now-available-online.html (original)
+++ spark/site/news/strata-exercises-now-available-online.html Thu Jun 16 
22:14:05 2016
@@ -150,6 +150,9 @@
   Latest News
   
 
+  Call 
for Presentations for Spark Summit EU is Open
+  (Jun 16, 2016)
+
   Preview release of 
Spark 2.0
   (May 26, 2016)
 
@@ -159,9 +162,6 @@
   Spark 1.6.1 
released
   (Mar 09, 2016)
 
-  Submission is open for 
Spark Summit San Francisco
-  (Feb 11, 2016)
-
   
   Archive
 

Modified: spark/site/news/submit-talks-to-spark-summit-2014.html
URL: 
http://svn.apache.org/viewvc/spark/site/news/submit-talks-to-spark-summit-2014.html?rev=1748776&r1=1748775&r2=1748776&view=diff
==
--- spark/site/news/submit-talks-to-spark-summit-2014.html (original)
+++ spark/site/news/submit-talks-to-spark-summit-2014.html Thu Jun 16 22:14:05 
2016
@@ -150,6 +150,9 @@
   Latest News
   
 
+  Call 
for Presentations for Spark Summit EU is Open
+

svn commit: r1748776 [1/2] - in /spark: news/_posts/ site/ site/graphx/ site/mllib/ site/news/ site/releases/ site/screencasts/ site/sql/ site/streaming/

2016-06-16 Thread yhuai
Author: yhuai
Date: Thu Jun 16 22:14:05 2016
New Revision: 1748776

URL: http://svn.apache.org/viewvc?rev=1748776&view=rev
Log:
Add a new news for CFP of Spark Summit 2016 EU

Added:
spark/news/_posts/2016-06-16-submit-talks-to-spark-summit-eu-2016.md
spark/site/news/submit-talks-to-spark-summit-eu-2016.html
Modified:
spark/site/community.html
spark/site/documentation.html
spark/site/downloads.html
spark/site/examples.html
spark/site/faq.html
spark/site/graphx/index.html
spark/site/index.html
spark/site/mailing-lists.html
spark/site/mllib/index.html
spark/site/news/amp-camp-2013-registration-ope.html
spark/site/news/announcing-the-first-spark-summit.html
spark/site/news/fourth-spark-screencast-published.html
spark/site/news/index.html
spark/site/news/nsdi-paper.html
spark/site/news/one-month-to-spark-summit-2015.html
spark/site/news/proposals-open-for-spark-summit-east.html
spark/site/news/registration-open-for-spark-summit-east.html
spark/site/news/run-spark-and-shark-on-amazon-emr.html
spark/site/news/spark-0-6-1-and-0-5-2-released.html
spark/site/news/spark-0-6-2-released.html
spark/site/news/spark-0-7-0-released.html
spark/site/news/spark-0-7-2-released.html
spark/site/news/spark-0-7-3-released.html
spark/site/news/spark-0-8-0-released.html
spark/site/news/spark-0-8-1-released.html
spark/site/news/spark-0-9-0-released.html
spark/site/news/spark-0-9-1-released.html
spark/site/news/spark-0-9-2-released.html
spark/site/news/spark-1-0-0-released.html
spark/site/news/spark-1-0-1-released.html
spark/site/news/spark-1-0-2-released.html
spark/site/news/spark-1-1-0-released.html
spark/site/news/spark-1-1-1-released.html
spark/site/news/spark-1-2-0-released.html
spark/site/news/spark-1-2-1-released.html
spark/site/news/spark-1-2-2-released.html
spark/site/news/spark-1-3-0-released.html
spark/site/news/spark-1-4-0-released.html
spark/site/news/spark-1-4-1-released.html
spark/site/news/spark-1-5-0-released.html
spark/site/news/spark-1-5-1-released.html
spark/site/news/spark-1-5-2-released.html
spark/site/news/spark-1-6-0-released.html
spark/site/news/spark-1-6-1-released.html
spark/site/news/spark-2.0.0-preview.html
spark/site/news/spark-accepted-into-apache-incubator.html
spark/site/news/spark-and-shark-in-the-news.html
spark/site/news/spark-becomes-tlp.html
spark/site/news/spark-featured-in-wired.html
spark/site/news/spark-mailing-lists-moving-to-apache.html
spark/site/news/spark-meetups.html
spark/site/news/spark-screencasts-published.html
spark/site/news/spark-summit-2013-is-a-wrap.html
spark/site/news/spark-summit-2014-videos-posted.html
spark/site/news/spark-summit-2015-videos-posted.html
spark/site/news/spark-summit-agenda-posted.html
spark/site/news/spark-summit-east-2015-videos-posted.html
spark/site/news/spark-summit-east-2016-cfp-closing.html
spark/site/news/spark-summit-east-agenda-posted.html
spark/site/news/spark-summit-europe-agenda-posted.html
spark/site/news/spark-summit-europe.html
spark/site/news/spark-summit-june-2016-agenda-posted.html
spark/site/news/spark-tips-from-quantifind.html
spark/site/news/spark-user-survey-and-powered-by-page.html
spark/site/news/spark-version-0-6-0-released.html
spark/site/news/spark-wins-daytona-gray-sort-100tb-benchmark.html
spark/site/news/strata-exercises-now-available-online.html
spark/site/news/submit-talks-to-spark-summit-2014.html
spark/site/news/submit-talks-to-spark-summit-2016.html
spark/site/news/submit-talks-to-spark-summit-east-2016.html
spark/site/news/two-weeks-to-spark-summit-2014.html
spark/site/news/video-from-first-spark-development-meetup.html
spark/site/releases/spark-release-0-3.html
spark/site/releases/spark-release-0-5-0.html
spark/site/releases/spark-release-0-5-1.html
spark/site/releases/spark-release-0-5-2.html
spark/site/releases/spark-release-0-6-0.html
spark/site/releases/spark-release-0-6-1.html
spark/site/releases/spark-release-0-6-2.html
spark/site/releases/spark-release-0-7-0.html
spark/site/releases/spark-release-0-7-2.html
spark/site/releases/spark-release-0-7-3.html
spark/site/releases/spark-release-0-8-0.html
spark/site/releases/spark-release-0-8-1.html
spark/site/releases/spark-release-0-9-0.html
spark/site/releases/spark-release-0-9-1.html
spark/site/releases/spark-release-0-9-2.html
spark/site/releases/spark-release-1-0-0.html
spark/site/releases/spark-release-1-0-1.html
spark/site/releases/spark-release-1-0-2.html
spark/site/releases/spark-release-1-1-0.html
spark/site/releases/spark-release-1-1-1.html
spark/site/releases/spark-release-1-2-0.html
spark/site/releases/spark-release-1-2-1.html
spark/site/releases/spark-release-1-2-2.html
spar

spark git commit: [SPARK-15749][SQL] make the error message more meaningful

2016-06-16 Thread andrewor14
Repository: spark
Updated Branches:
  refs/heads/master e849285df -> 62d2fa5e9


[SPARK-15749][SQL] make the error message more meaningful

## What changes were proposed in this pull request?

For table test1 (C1 varchar (10), C2 varchar (10)), when I insert a row using
```
sqlContext.sql("insert into test1 values ('abc', 'def', 1)")
```
I got error message

```
Exception in thread "main" java.lang.RuntimeException: RelationC1#0,C2#1 
JDBCRelation(test1)
requires that the query in the SELECT clause of the INSERT INTO/OVERWRITE 
statement
generates the same number of columns as its schema.
```
The error message is a little confusing. In my simple insert statement, it 
doesn't have a SELECT clause.

I will change the error message to a more general one

```
Exception in thread "main" java.lang.RuntimeException: RelationC1#0,C2#1 
JDBCRelation(test1)
requires that the data to be inserted have the same number of columns as the 
target table.
```

## How was this patch tested?

I tested the patch using my simple unit test, but it's a very trivial change 
and I don't think I need to check in any test.

Author: Huaxin Gao 

Closes #13492 from huaxingao/spark-15749.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/62d2fa5e
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/62d2fa5e
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/62d2fa5e

Branch: refs/heads/master
Commit: 62d2fa5e996d428caaea005041b17ec115473762
Parents: e849285
Author: Huaxin Gao 
Authored: Thu Jun 16 14:37:10 2016 -0700
Committer: Andrew Or 
Committed: Thu Jun 16 14:37:10 2016 -0700

--
 .../org/apache/spark/sql/execution/datasources/rules.scala  | 5 +++--
 .../test/scala/org/apache/spark/sql/sources/InsertSuite.scala   | 2 +-
 2 files changed, 4 insertions(+), 3 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/62d2fa5e/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/rules.scala
--
diff --git 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/rules.scala
 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/rules.scala
index 7ac62fb..543389e 100644
--- 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/rules.scala
+++ 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/rules.scala
@@ -78,8 +78,9 @@ private[sql] object PreInsertCastAndRename extends 
Rule[LogicalPlan] {
 // schema of the relation.
 if (l.output.size != child.output.size) {
   sys.error(
-s"$l requires that the query in the SELECT clause of the INSERT 
INTO/OVERWRITE " +
-  s"statement generates the same number of columns as its schema.")
+s"$l requires that the data to be inserted have the same number of 
columns as the " +
+  s"target table: target table has ${l.output.size} column(s) but 
" +
+  s"the inserted data has ${child.output.size} column(s).")
 }
 castAndRenameChildOutput(i, l.output, child)
   }

http://git-wip-us.apache.org/repos/asf/spark/blob/62d2fa5e/sql/core/src/test/scala/org/apache/spark/sql/sources/InsertSuite.scala
--
diff --git 
a/sql/core/src/test/scala/org/apache/spark/sql/sources/InsertSuite.scala 
b/sql/core/src/test/scala/org/apache/spark/sql/sources/InsertSuite.scala
index bade41b..d717955 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/sources/InsertSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/sources/InsertSuite.scala
@@ -95,7 +95,7 @@ class InsertSuite extends DataSourceTest with 
SharedSQLContext {
   """.stripMargin)
 }.getMessage
 assert(
-  message.contains("generates the same number of columns as its schema"),
+  message.contains("requires that the data to be inserted have the same 
number of columns"),
   "SELECT clause generating a different number of columns should not be 
not allowed."
 )
   }


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org



spark git commit: [SPARK-15749][SQL] make the error message more meaningful

2016-06-16 Thread andrewor14
Repository: spark
Updated Branches:
  refs/heads/branch-2.0 27e274c3e -> 2280ad8a3


[SPARK-15749][SQL] make the error message more meaningful

## What changes were proposed in this pull request?

For table test1 (C1 varchar (10), C2 varchar (10)), when I insert a row using
```
sqlContext.sql("insert into test1 values ('abc', 'def', 1)")
```
I got error message

```
Exception in thread "main" java.lang.RuntimeException: RelationC1#0,C2#1 
JDBCRelation(test1)
requires that the query in the SELECT clause of the INSERT INTO/OVERWRITE 
statement
generates the same number of columns as its schema.
```
The error message is a little confusing. In my simple insert statement, it 
doesn't have a SELECT clause.

I will change the error message to a more general one

```
Exception in thread "main" java.lang.RuntimeException: RelationC1#0,C2#1 
JDBCRelation(test1)
requires that the data to be inserted have the same number of columns as the 
target table.
```

## How was this patch tested?

I tested the patch using my simple unit test, but it's a very trivial change 
and I don't think I need to check in any test.

Author: Huaxin Gao 

Closes #13492 from huaxingao/spark-15749.

(cherry picked from commit 62d2fa5e996d428caaea005041b17ec115473762)
Signed-off-by: Andrew Or 


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/2280ad8a
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/2280ad8a
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/2280ad8a

Branch: refs/heads/branch-2.0
Commit: 2280ad8a3ddfff0b7cc10de6eadb2cc93423bbcf
Parents: 27e274c
Author: Huaxin Gao 
Authored: Thu Jun 16 14:37:10 2016 -0700
Committer: Andrew Or 
Committed: Thu Jun 16 14:37:19 2016 -0700

--
 .../org/apache/spark/sql/execution/datasources/rules.scala  | 5 +++--
 .../test/scala/org/apache/spark/sql/sources/InsertSuite.scala   | 2 +-
 2 files changed, 4 insertions(+), 3 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/2280ad8a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/rules.scala
--
diff --git 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/rules.scala
 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/rules.scala
index 7ac62fb..543389e 100644
--- 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/rules.scala
+++ 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/rules.scala
@@ -78,8 +78,9 @@ private[sql] object PreInsertCastAndRename extends 
Rule[LogicalPlan] {
 // schema of the relation.
 if (l.output.size != child.output.size) {
   sys.error(
-s"$l requires that the query in the SELECT clause of the INSERT 
INTO/OVERWRITE " +
-  s"statement generates the same number of columns as its schema.")
+s"$l requires that the data to be inserted have the same number of 
columns as the " +
+  s"target table: target table has ${l.output.size} column(s) but 
" +
+  s"the inserted data has ${child.output.size} column(s).")
 }
 castAndRenameChildOutput(i, l.output, child)
   }

http://git-wip-us.apache.org/repos/asf/spark/blob/2280ad8a/sql/core/src/test/scala/org/apache/spark/sql/sources/InsertSuite.scala
--
diff --git 
a/sql/core/src/test/scala/org/apache/spark/sql/sources/InsertSuite.scala 
b/sql/core/src/test/scala/org/apache/spark/sql/sources/InsertSuite.scala
index bade41b..d717955 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/sources/InsertSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/sources/InsertSuite.scala
@@ -95,7 +95,7 @@ class InsertSuite extends DataSourceTest with 
SharedSQLContext {
   """.stripMargin)
 }.getMessage
 assert(
-  message.contains("generates the same number of columns as its schema"),
+  message.contains("requires that the data to be inserted have the same 
number of columns"),
   "SELECT clause generating a different number of columns should not be 
not allowed."
 )
   }


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org



spark git commit: [SPARK-15868][WEB UI] Executors table in Executors tab should sort Executor IDs in numerical order

2016-06-16 Thread andrewor14
Repository: spark
Updated Branches:
  refs/heads/branch-2.0 fb0fab63c -> 27e274c3e


[SPARK-15868][WEB UI] Executors table in Executors tab should sort Executor IDs 
in numerical order

## What changes were proposed in this pull request?

Currently the Executors table sorts by id using a string sort (since that's 
what it is stored as). Since  the id is a number (other than the driver) we 
should be sorting numerically. I have changed both the initial sort on page 
load as well as the table sort to sort on id numerically, treating non-numeric 
strings (like the driver) as "-1"

## How was this patch tested?

Manually tested and dev/run-tests

![pageload](https://cloud.githubusercontent.com/assets/13952758/16027882/d32edd0a-318e-11e6-9faf-fc972b7c36ab.png)
![sorted](https://cloud.githubusercontent.com/assets/13952758/16027883/d34541c6-318e-11e6-9ed7-6bfc0cd4152e.png)

Author: Alex Bozarth 

Closes #13654 from ajbozarth/spark15868.

(cherry picked from commit e849285df03b1233d5f647f1b6c5a6dad0665855)
Signed-off-by: Andrew Or 


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/27e274c3
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/27e274c3
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/27e274c3

Branch: refs/heads/branch-2.0
Commit: 27e274c3e8cad29fc684a1611cef19d60acdfbc0
Parents: fb0fab6
Author: Alex Bozarth 
Authored: Thu Jun 16 14:29:11 2016 -0700
Committer: Andrew Or 
Committed: Thu Jun 16 14:29:21 2016 -0700

--
 .../main/scala/org/apache/spark/ui/exec/ExecutorsPage.scala | 9 +++--
 1 file changed, 7 insertions(+), 2 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/27e274c3/core/src/main/scala/org/apache/spark/ui/exec/ExecutorsPage.scala
--
diff --git a/core/src/main/scala/org/apache/spark/ui/exec/ExecutorsPage.scala 
b/core/src/main/scala/org/apache/spark/ui/exec/ExecutorsPage.scala
index 791dbe5..67deb7b 100644
--- a/core/src/main/scala/org/apache/spark/ui/exec/ExecutorsPage.scala
+++ b/core/src/main/scala/org/apache/spark/ui/exec/ExecutorsPage.scala
@@ -20,6 +20,7 @@ package org.apache.spark.ui.exec
 import java.net.URLEncoder
 import javax.servlet.http.HttpServletRequest
 
+import scala.util.Try
 import scala.xml.Node
 
 import org.apache.spark.status.api.v1.ExecutorSummary
@@ -53,6 +54,9 @@ private[ui] class ExecutorsPage(
   // When GCTimePercent is edited change ToolTips.TASK_TIME to match
   private val GCTimePercent = 0.1
 
+  // a safe String to Int for sorting ids (converts non-numeric Strings to -1)
+  private def idStrToInt(str: String) : Int = Try(str.toInt).getOrElse(-1)
+
   def render(request: HttpServletRequest): Seq[Node] = {
 val (activeExecutorInfo, deadExecutorInfo) = listener.synchronized {
   // The follow codes should be protected by `listener` to make sure no 
executors will be
@@ -69,13 +73,14 @@ private[ui] class ExecutorsPage(
 }
 
 val execInfo = activeExecutorInfo ++ deadExecutorInfo
+implicit val idOrder = Ordering[Int].on((s: String) => 
idStrToInt(s)).reverse
 val execInfoSorted = execInfo.sortBy(_.id)
 val logsExist = execInfo.filter(_.executorLogs.nonEmpty).nonEmpty
 
 val execTable = {
   
 
-  Executor ID
+  Executor ID
   Address
   Status
   RDD Blocks
@@ -136,7 +141,7 @@ private[ui] class ExecutorsPage(
   }
 
 
-  {info.id}
+  {info.id}
   {info.hostPort}
   
 {executorStatus}


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org



spark git commit: [SPARK-15868][WEB UI] Executors table in Executors tab should sort Executor IDs in numerical order

2016-06-16 Thread andrewor14
Repository: spark
Updated Branches:
  refs/heads/master 2d27eb1e7 -> e849285df


[SPARK-15868][WEB UI] Executors table in Executors tab should sort Executor IDs 
in numerical order

## What changes were proposed in this pull request?

Currently the Executors table sorts by id using a string sort (since that's 
what it is stored as). Since  the id is a number (other than the driver) we 
should be sorting numerically. I have changed both the initial sort on page 
load as well as the table sort to sort on id numerically, treating non-numeric 
strings (like the driver) as "-1"

## How was this patch tested?

Manually tested and dev/run-tests

![pageload](https://cloud.githubusercontent.com/assets/13952758/16027882/d32edd0a-318e-11e6-9faf-fc972b7c36ab.png)
![sorted](https://cloud.githubusercontent.com/assets/13952758/16027883/d34541c6-318e-11e6-9ed7-6bfc0cd4152e.png)

Author: Alex Bozarth 

Closes #13654 from ajbozarth/spark15868.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/e849285d
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/e849285d
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/e849285d

Branch: refs/heads/master
Commit: e849285df03b1233d5f647f1b6c5a6dad0665855
Parents: 2d27eb1
Author: Alex Bozarth 
Authored: Thu Jun 16 14:29:11 2016 -0700
Committer: Andrew Or 
Committed: Thu Jun 16 14:29:11 2016 -0700

--
 .../main/scala/org/apache/spark/ui/exec/ExecutorsPage.scala | 9 +++--
 1 file changed, 7 insertions(+), 2 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/e849285d/core/src/main/scala/org/apache/spark/ui/exec/ExecutorsPage.scala
--
diff --git a/core/src/main/scala/org/apache/spark/ui/exec/ExecutorsPage.scala 
b/core/src/main/scala/org/apache/spark/ui/exec/ExecutorsPage.scala
index 791dbe5..67deb7b 100644
--- a/core/src/main/scala/org/apache/spark/ui/exec/ExecutorsPage.scala
+++ b/core/src/main/scala/org/apache/spark/ui/exec/ExecutorsPage.scala
@@ -20,6 +20,7 @@ package org.apache.spark.ui.exec
 import java.net.URLEncoder
 import javax.servlet.http.HttpServletRequest
 
+import scala.util.Try
 import scala.xml.Node
 
 import org.apache.spark.status.api.v1.ExecutorSummary
@@ -53,6 +54,9 @@ private[ui] class ExecutorsPage(
   // When GCTimePercent is edited change ToolTips.TASK_TIME to match
   private val GCTimePercent = 0.1
 
+  // a safe String to Int for sorting ids (converts non-numeric Strings to -1)
+  private def idStrToInt(str: String) : Int = Try(str.toInt).getOrElse(-1)
+
   def render(request: HttpServletRequest): Seq[Node] = {
 val (activeExecutorInfo, deadExecutorInfo) = listener.synchronized {
   // The follow codes should be protected by `listener` to make sure no 
executors will be
@@ -69,13 +73,14 @@ private[ui] class ExecutorsPage(
 }
 
 val execInfo = activeExecutorInfo ++ deadExecutorInfo
+implicit val idOrder = Ordering[Int].on((s: String) => 
idStrToInt(s)).reverse
 val execInfoSorted = execInfo.sortBy(_.id)
 val logsExist = execInfo.filter(_.executorLogs.nonEmpty).nonEmpty
 
 val execTable = {
   
 
-  Executor ID
+  Executor ID
   Address
   Status
   RDD Blocks
@@ -136,7 +141,7 @@ private[ui] class ExecutorsPage(
   }
 
 
-  {info.id}
+  {info.id}
   {info.hostPort}
   
 {executorStatus}


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org



spark git commit: [MINOR][DOCS][SQL] Fix some comments about types(TypeCoercion, Partition) and exceptions.

2016-06-16 Thread andrewor14
Repository: spark
Updated Branches:
  refs/heads/branch-2.0 7d8cddfb4 -> fb0fab63c


[MINOR][DOCS][SQL] Fix some comments about types(TypeCoercion,Partition) and 
exceptions.

## What changes were proposed in this pull request?

This PR contains a few changes on code comments.
- `HiveTypeCoercion` is renamed into `TypeCoercion`.
- `NoSuchDatabaseException` is only used for the absence of database.
- For partition type inference, only `DoubleType` is considered.

## How was this patch tested?

N/A

Author: Dongjoon Hyun 

Closes #13674 from dongjoon-hyun/minor_doc_types.

(cherry picked from commit 2d27eb1e753daefbd311136fc7de1a3e8fb9dc63)
Signed-off-by: Andrew Or 


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/fb0fab63
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/fb0fab63
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/fb0fab63

Branch: refs/heads/branch-2.0
Commit: fb0fab63cb005d9efc624aeb0ac85476a9ddc4f4
Parents: 7d8cddf
Author: Dongjoon Hyun 
Authored: Thu Jun 16 14:27:09 2016 -0700
Committer: Andrew Or 
Committed: Thu Jun 16 14:27:17 2016 -0700

--
 .../org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala| 4 ++--
 .../org/apache/spark/sql/catalyst/catalog/ExternalCatalog.scala  | 2 +-
 .../src/main/scala/org/apache/spark/sql/types/Decimal.scala  | 2 +-
 .../spark/sql/execution/datasources/PartitioningUtils.scala  | 2 +-
 4 files changed, 5 insertions(+), 5 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/fb0fab63/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala
--
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala
index 16df628..baec6d1 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala
@@ -73,7 +73,7 @@ object TypeCoercion {
   DoubleType)
 
   /**
-   * Case 1 type widening (see the classdoc comment above for 
HiveTypeCoercion).
+   * Case 1 type widening (see the classdoc comment above for TypeCoercion).
*
* Find the tightest common type of two types that might be used in a binary 
expression.
* This handles all numeric types except fixed-precision decimals 
interacting with each other or
@@ -132,7 +132,7 @@ object TypeCoercion {
   }
 
   /**
-   * Case 2 type widening (see the classdoc comment above for 
HiveTypeCoercion).
+   * Case 2 type widening (see the classdoc comment above for TypeCoercion).
*
* i.e. the main difference with [[findTightestCommonTypeOfTwo]] is that 
here we allow some
* loss of precision when widening decimal and double.

http://git-wip-us.apache.org/repos/asf/spark/blob/fb0fab63/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalog.scala
--
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalog.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalog.scala
index 81974b2..6714846 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalog.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalog.scala
@@ -27,7 +27,7 @@ import 
org.apache.spark.sql.catalyst.analysis.NoSuchDatabaseException
  * can be accessed in multiple threads. This is an external catalog because it 
is expected to
  * interact with external systems.
  *
- * Implementations should throw [[NoSuchDatabaseException]] when table or 
database don't exist.
+ * Implementations should throw [[NoSuchDatabaseException]] when databases 
don't exist.
  */
 abstract class ExternalCatalog {
   import CatalogTypes.TablePartitionSpec

http://git-wip-us.apache.org/repos/asf/spark/blob/fb0fab63/sql/catalyst/src/main/scala/org/apache/spark/sql/types/Decimal.scala
--
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/Decimal.scala 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/Decimal.scala
index 52e0210..cc8175c 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/Decimal.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/Decimal.scala
@@ -322,7 +322,7 @@ final class Decimal extends Ordered[Decimal] with 
Serializable {
 }
   }
 
-  // HiveTypeCoercion will take care of the precision, scale of result
+  // TypeCoercion will take care of the precision, scale of result
   def * (that: Decima

spark git commit: [MINOR][DOCS][SQL] Fix some comments about types(TypeCoercion, Partition) and exceptions.

2016-06-16 Thread andrewor14
Repository: spark
Updated Branches:
  refs/heads/master 796429d71 -> 2d27eb1e7


[MINOR][DOCS][SQL] Fix some comments about types(TypeCoercion,Partition) and 
exceptions.

## What changes were proposed in this pull request?

This PR contains a few changes on code comments.
- `HiveTypeCoercion` is renamed into `TypeCoercion`.
- `NoSuchDatabaseException` is only used for the absence of database.
- For partition type inference, only `DoubleType` is considered.

## How was this patch tested?

N/A

Author: Dongjoon Hyun 

Closes #13674 from dongjoon-hyun/minor_doc_types.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/2d27eb1e
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/2d27eb1e
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/2d27eb1e

Branch: refs/heads/master
Commit: 2d27eb1e753daefbd311136fc7de1a3e8fb9dc63
Parents: 796429d
Author: Dongjoon Hyun 
Authored: Thu Jun 16 14:27:09 2016 -0700
Committer: Andrew Or 
Committed: Thu Jun 16 14:27:09 2016 -0700

--
 .../org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala| 4 ++--
 .../org/apache/spark/sql/catalyst/catalog/ExternalCatalog.scala  | 2 +-
 .../src/main/scala/org/apache/spark/sql/types/Decimal.scala  | 2 +-
 .../spark/sql/execution/datasources/PartitioningUtils.scala  | 2 +-
 4 files changed, 5 insertions(+), 5 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/2d27eb1e/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala
--
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala
index 16df628..baec6d1 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala
@@ -73,7 +73,7 @@ object TypeCoercion {
   DoubleType)
 
   /**
-   * Case 1 type widening (see the classdoc comment above for 
HiveTypeCoercion).
+   * Case 1 type widening (see the classdoc comment above for TypeCoercion).
*
* Find the tightest common type of two types that might be used in a binary 
expression.
* This handles all numeric types except fixed-precision decimals 
interacting with each other or
@@ -132,7 +132,7 @@ object TypeCoercion {
   }
 
   /**
-   * Case 2 type widening (see the classdoc comment above for 
HiveTypeCoercion).
+   * Case 2 type widening (see the classdoc comment above for TypeCoercion).
*
* i.e. the main difference with [[findTightestCommonTypeOfTwo]] is that 
here we allow some
* loss of precision when widening decimal and double.

http://git-wip-us.apache.org/repos/asf/spark/blob/2d27eb1e/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalog.scala
--
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalog.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalog.scala
index 81974b2..6714846 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalog.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalog.scala
@@ -27,7 +27,7 @@ import 
org.apache.spark.sql.catalyst.analysis.NoSuchDatabaseException
  * can be accessed in multiple threads. This is an external catalog because it 
is expected to
  * interact with external systems.
  *
- * Implementations should throw [[NoSuchDatabaseException]] when table or 
database don't exist.
+ * Implementations should throw [[NoSuchDatabaseException]] when databases 
don't exist.
  */
 abstract class ExternalCatalog {
   import CatalogTypes.TablePartitionSpec

http://git-wip-us.apache.org/repos/asf/spark/blob/2d27eb1e/sql/catalyst/src/main/scala/org/apache/spark/sql/types/Decimal.scala
--
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/Decimal.scala 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/Decimal.scala
index 52e0210..cc8175c 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/Decimal.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/Decimal.scala
@@ -322,7 +322,7 @@ final class Decimal extends Ordered[Decimal] with 
Serializable {
 }
   }
 
-  // HiveTypeCoercion will take care of the precision, scale of result
+  // TypeCoercion will take care of the precision, scale of result
   def * (that: Decimal): Decimal =
 Decimal(toJavaBigDecimal.multiply(that.toJavaBigDecimal, MATH_CONTEXT))
 

http://git

spark git commit: [SPARK-15998][SQL] Verification of SQLConf HIVE_METASTORE_PARTITION_PRUNING

2016-06-16 Thread andrewor14
Repository: spark
Updated Branches:
  refs/heads/branch-2.0 1230516d9 -> 7d8cddfb4


[SPARK-15998][SQL] Verification of SQLConf HIVE_METASTORE_PARTITION_PRUNING

 What changes were proposed in this pull request?
`HIVE_METASTORE_PARTITION_PRUNING` is a public `SQLConf`. When `true`, some 
predicates will be pushed down into the Hive metastore so that unmatching 
partitions can be eliminated earlier. The current default value is `false`. For 
performance improvement, users might turn this parameter on.

So far, the code base does not have such a test case to verify whether this 
`SQLConf` properly works. This PR is to improve the test case coverage for 
avoiding future regression.

 How was this patch tested?
N/A

Author: gatorsmile 

Closes #13716 from gatorsmile/addTestMetastorePartitionPruning.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/7d8cddfb
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/7d8cddfb
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/7d8cddfb

Branch: refs/heads/branch-2.0
Commit: 7d8cddfb495d406b9f2fb5216edd14dea442ec73
Parents: 1230516
Author: gatorsmile 
Authored: Thu Jun 16 14:23:17 2016 -0700
Committer: Andrew Or 
Committed: Thu Jun 16 14:26:46 2016 -0700

--
 .../sql/hive/execution/HiveTableScanSuite.scala | 60 +++-
 1 file changed, 57 insertions(+), 3 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/7d8cddfb/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveTableScanSuite.scala
--
diff --git 
a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveTableScanSuite.scala
 
b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveTableScanSuite.scala
index 60f8be5..76d3f3d 100644
--- 
a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveTableScanSuite.scala
+++ 
b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveTableScanSuite.scala
@@ -18,13 +18,14 @@
 package org.apache.spark.sql.hive.execution
 
 import org.apache.spark.sql.Row
-import org.apache.spark.sql.functions._
-import org.apache.spark.sql.hive.test.TestHive
+import org.apache.spark.sql.hive.test.{TestHive, TestHiveSingleton}
 import org.apache.spark.sql.hive.test.TestHive._
 import org.apache.spark.sql.hive.test.TestHive.implicits._
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.test.SQLTestUtils
 import org.apache.spark.util.Utils
 
-class HiveTableScanSuite extends HiveComparisonTest {
+class HiveTableScanSuite extends HiveComparisonTest with SQLTestUtils with 
TestHiveSingleton {
 
   createQueryTest("partition_based_table_scan_with_different_serde",
 """
@@ -89,4 +90,57 @@ class HiveTableScanSuite extends HiveComparisonTest {
 assert(sql("select CaseSensitiveColName from spark_4959_2").head() === 
Row("hi"))
 assert(sql("select casesensitivecolname from spark_4959_2").head() === 
Row("hi"))
   }
+
+  private def checkNumScannedPartitions(stmt: String, expectedNumParts: Int): 
Unit = {
+val plan = sql(stmt).queryExecution.sparkPlan
+val numPartitions = plan.collectFirst {
+  case p: HiveTableScanExec =>
+p.relation.getHiveQlPartitions(p.partitionPruningPred).length
+}.getOrElse(0)
+assert(numPartitions == expectedNumParts)
+  }
+
+  test("Verify SQLConf HIVE_METASTORE_PARTITION_PRUNING") {
+val view = "src"
+withTempTable(view) {
+  spark.range(1, 5).createOrReplaceTempView(view)
+  val table = "table_with_partition"
+  withTable(table) {
+sql(
+  s"""
+ |CREATE TABLE $table(id string)
+ |PARTITIONED BY (p1 string,p2 string,p3 string,p4 string,p5 
string)
+   """.stripMargin)
+sql(
+  s"""
+ |FROM $view v
+ |INSERT INTO TABLE $table
+ |PARTITION (p1='a',p2='b',p3='c',p4='d',p5='e')
+ |SELECT v.id
+ |INSERT INTO TABLE $table
+ |PARTITION (p1='a',p2='c',p3='c',p4='d',p5='e')
+ |SELECT v.id
+   """.stripMargin)
+
+Seq("true", "false").foreach { hivePruning =>
+  withSQLConf(SQLConf.HIVE_METASTORE_PARTITION_PRUNING.key -> 
hivePruning) {
+// If the pruning predicate is used, getHiveQlPartitions should 
only return the
+// qualified partition; Otherwise, it return all the partitions.
+val expectedNumPartitions = if (hivePruning == "true") 1 else 2
+checkNumScannedPartitions(
+  stmt = s"SELECT id, p2 FROM $table WHERE p2 <= 'b'", 
expectedNumPartitions)
+  }
+}
+
+Seq("true", "false").foreach { hivePruning =>
+  withSQLConf(SQLConf.HIVE_METASTORE_PARTITION_PRUNING.key -> 
hivePruning) {
+ 

spark git commit: [SPARK-15998][SQL] Verification of SQLConf HIVE_METASTORE_PARTITION_PRUNING

2016-06-16 Thread andrewor14
Repository: spark
Updated Branches:
  refs/heads/master 7a89f2adb -> 796429d71


[SPARK-15998][SQL] Verification of SQLConf HIVE_METASTORE_PARTITION_PRUNING

 What changes were proposed in this pull request?
`HIVE_METASTORE_PARTITION_PRUNING` is a public `SQLConf`. When `true`, some 
predicates will be pushed down into the Hive metastore so that unmatching 
partitions can be eliminated earlier. The current default value is `false`. For 
performance improvement, users might turn this parameter on.

So far, the code base does not have such a test case to verify whether this 
`SQLConf` properly works. This PR is to improve the test case coverage for 
avoiding future regression.

 How was this patch tested?
N/A

Author: gatorsmile 

Closes #13716 from gatorsmile/addTestMetastorePartitionPruning.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/796429d7
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/796429d7
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/796429d7

Branch: refs/heads/master
Commit: 796429d7117e2544207bd9d67bda8b603cb1a535
Parents: 7a89f2a
Author: gatorsmile 
Authored: Thu Jun 16 14:23:17 2016 -0700
Committer: Andrew Or 
Committed: Thu Jun 16 14:23:17 2016 -0700

--
 .../sql/hive/execution/HiveTableScanSuite.scala | 60 +++-
 1 file changed, 57 insertions(+), 3 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/796429d7/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveTableScanSuite.scala
--
diff --git 
a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveTableScanSuite.scala
 
b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveTableScanSuite.scala
index 60f8be5..76d3f3d 100644
--- 
a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveTableScanSuite.scala
+++ 
b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveTableScanSuite.scala
@@ -18,13 +18,14 @@
 package org.apache.spark.sql.hive.execution
 
 import org.apache.spark.sql.Row
-import org.apache.spark.sql.functions._
-import org.apache.spark.sql.hive.test.TestHive
+import org.apache.spark.sql.hive.test.{TestHive, TestHiveSingleton}
 import org.apache.spark.sql.hive.test.TestHive._
 import org.apache.spark.sql.hive.test.TestHive.implicits._
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.test.SQLTestUtils
 import org.apache.spark.util.Utils
 
-class HiveTableScanSuite extends HiveComparisonTest {
+class HiveTableScanSuite extends HiveComparisonTest with SQLTestUtils with 
TestHiveSingleton {
 
   createQueryTest("partition_based_table_scan_with_different_serde",
 """
@@ -89,4 +90,57 @@ class HiveTableScanSuite extends HiveComparisonTest {
 assert(sql("select CaseSensitiveColName from spark_4959_2").head() === 
Row("hi"))
 assert(sql("select casesensitivecolname from spark_4959_2").head() === 
Row("hi"))
   }
+
+  private def checkNumScannedPartitions(stmt: String, expectedNumParts: Int): 
Unit = {
+val plan = sql(stmt).queryExecution.sparkPlan
+val numPartitions = plan.collectFirst {
+  case p: HiveTableScanExec =>
+p.relation.getHiveQlPartitions(p.partitionPruningPred).length
+}.getOrElse(0)
+assert(numPartitions == expectedNumParts)
+  }
+
+  test("Verify SQLConf HIVE_METASTORE_PARTITION_PRUNING") {
+val view = "src"
+withTempTable(view) {
+  spark.range(1, 5).createOrReplaceTempView(view)
+  val table = "table_with_partition"
+  withTable(table) {
+sql(
+  s"""
+ |CREATE TABLE $table(id string)
+ |PARTITIONED BY (p1 string,p2 string,p3 string,p4 string,p5 
string)
+   """.stripMargin)
+sql(
+  s"""
+ |FROM $view v
+ |INSERT INTO TABLE $table
+ |PARTITION (p1='a',p2='b',p3='c',p4='d',p5='e')
+ |SELECT v.id
+ |INSERT INTO TABLE $table
+ |PARTITION (p1='a',p2='c',p3='c',p4='d',p5='e')
+ |SELECT v.id
+   """.stripMargin)
+
+Seq("true", "false").foreach { hivePruning =>
+  withSQLConf(SQLConf.HIVE_METASTORE_PARTITION_PRUNING.key -> 
hivePruning) {
+// If the pruning predicate is used, getHiveQlPartitions should 
only return the
+// qualified partition; Otherwise, it return all the partitions.
+val expectedNumPartitions = if (hivePruning == "true") 1 else 2
+checkNumScannedPartitions(
+  stmt = s"SELECT id, p2 FROM $table WHERE p2 <= 'b'", 
expectedNumPartitions)
+  }
+}
+
+Seq("true", "false").foreach { hivePruning =>
+  withSQLConf(SQLConf.HIVE_METASTORE_PARTITION_PRUNING.key -> 
hivePruning) {
+ 

spark git commit: [SQL] Minor HashAggregateExec string output fixes

2016-06-16 Thread andrewor14
Repository: spark
Updated Branches:
  refs/heads/branch-2.0 938988757 -> 1230516d9


[SQL] Minor HashAggregateExec string output fixes

## What changes were proposed in this pull request?

This PR fixes some minor `.toString` format issues for `HashAggregateExec`.

Before:

```
*HashAggregate(key=[a#234L,b#235L], functions=[count(1),max(c#236L)], 
output=[a#234L,b#235L,count(c)#247L,max(c)#248L])
```

After:

```
*HashAggregate(keys=[a#234L, b#235L], functions=[count(1), max(c#236L)], 
output=[a#234L, b#235L, count(c)#247L, max(c)#248L])
```

## How was this patch tested?

Manually tested.

Author: Cheng Lian 

Closes #13710 from liancheng/minor-agg-string-fix.

(cherry picked from commit 7a89f2adbbc82a23f06638806ffc8596a7efe7f3)
Signed-off-by: Andrew Or 


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/1230516d
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/1230516d
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/1230516d

Branch: refs/heads/branch-2.0
Commit: 1230516d9314f55183bfa542eb7cdfac9d8dfec5
Parents: 9389887
Author: Cheng Lian 
Authored: Thu Jun 16 14:20:44 2016 -0700
Committer: Andrew Or 
Committed: Thu Jun 16 14:20:52 2016 -0700

--
 .../spark/sql/execution/aggregate/HashAggregateExec.scala | 10 +-
 1 file changed, 5 insertions(+), 5 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/1230516d/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/HashAggregateExec.scala
--
diff --git 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/HashAggregateExec.scala
 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/HashAggregateExec.scala
index caeeba1..54d7340 100644
--- 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/HashAggregateExec.scala
+++ 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/HashAggregateExec.scala
@@ -774,13 +774,13 @@ case class HashAggregateExec(
 
 testFallbackStartsAt match {
   case None =>
-val keyString = Utils.truncatedString(groupingExpressions, "[", ",", 
"]")
-val functionString = Utils.truncatedString(allAggregateExpressions, 
"[", ",", "]")
-val outputString = Utils.truncatedString(output, "[", ",", "]")
+val keyString = Utils.truncatedString(groupingExpressions, "[", ", ", 
"]")
+val functionString = Utils.truncatedString(allAggregateExpressions, 
"[", ", ", "]")
+val outputString = Utils.truncatedString(output, "[", ", ", "]")
 if (verbose) {
-  s"HashAggregate(key=$keyString, functions=$functionString, 
output=$outputString)"
+  s"HashAggregate(keys=$keyString, functions=$functionString, 
output=$outputString)"
 } else {
-  s"HashAggregate(key=$keyString, functions=$functionString)"
+  s"HashAggregate(keys=$keyString, functions=$functionString)"
 }
   case Some(fallbackStartsAt) =>
 s"HashAggregateWithControlledFallback $groupingExpressions " +


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org



spark git commit: [SQL] Minor HashAggregateExec string output fixes

2016-06-16 Thread andrewor14
Repository: spark
Updated Branches:
  refs/heads/master acef843f6 -> 7a89f2adb


[SQL] Minor HashAggregateExec string output fixes

## What changes were proposed in this pull request?

This PR fixes some minor `.toString` format issues for `HashAggregateExec`.

Before:

```
*HashAggregate(key=[a#234L,b#235L], functions=[count(1),max(c#236L)], 
output=[a#234L,b#235L,count(c)#247L,max(c)#248L])
```

After:

```
*HashAggregate(keys=[a#234L, b#235L], functions=[count(1), max(c#236L)], 
output=[a#234L, b#235L, count(c)#247L, max(c)#248L])
```

## How was this patch tested?

Manually tested.

Author: Cheng Lian 

Closes #13710 from liancheng/minor-agg-string-fix.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/7a89f2ad
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/7a89f2ad
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/7a89f2ad

Branch: refs/heads/master
Commit: 7a89f2adbbc82a23f06638806ffc8596a7efe7f3
Parents: acef843
Author: Cheng Lian 
Authored: Thu Jun 16 14:20:44 2016 -0700
Committer: Andrew Or 
Committed: Thu Jun 16 14:20:44 2016 -0700

--
 .../spark/sql/execution/aggregate/HashAggregateExec.scala | 10 +-
 1 file changed, 5 insertions(+), 5 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/7a89f2ad/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/HashAggregateExec.scala
--
diff --git 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/HashAggregateExec.scala
 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/HashAggregateExec.scala
index caeeba1..54d7340 100644
--- 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/HashAggregateExec.scala
+++ 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/HashAggregateExec.scala
@@ -774,13 +774,13 @@ case class HashAggregateExec(
 
 testFallbackStartsAt match {
   case None =>
-val keyString = Utils.truncatedString(groupingExpressions, "[", ",", 
"]")
-val functionString = Utils.truncatedString(allAggregateExpressions, 
"[", ",", "]")
-val outputString = Utils.truncatedString(output, "[", ",", "]")
+val keyString = Utils.truncatedString(groupingExpressions, "[", ", ", 
"]")
+val functionString = Utils.truncatedString(allAggregateExpressions, 
"[", ", ", "]")
+val outputString = Utils.truncatedString(output, "[", ", ", "]")
 if (verbose) {
-  s"HashAggregate(key=$keyString, functions=$functionString, 
output=$outputString)"
+  s"HashAggregate(keys=$keyString, functions=$functionString, 
output=$outputString)"
 } else {
-  s"HashAggregate(key=$keyString, functions=$functionString)"
+  s"HashAggregate(keys=$keyString, functions=$functionString)"
 }
   case Some(fallbackStartsAt) =>
 s"HashAggregateWithControlledFallback $groupingExpressions " +


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org



spark git commit: [SPARK-15975] Fix improper Popen retcode code handling in dev/run-tests

2016-06-16 Thread andrewor14
Repository: spark
Updated Branches:
  refs/heads/branch-1.6 cffc0800b -> 0a8ada506


[SPARK-15975] Fix improper Popen retcode code handling in dev/run-tests

In the `dev/run-tests.py` script we check a `Popen.retcode` for success using 
`retcode > 0`, but this is subtlety wrong because Popen's return code will be 
negative if the child process was terminated by a signal: 
https://docs.python.org/2/library/subprocess.html#subprocess.Popen.returncode

In order to properly handle signals, we should change this to check `retcode != 
0` instead.

Author: Josh Rosen 

Closes #13692 from JoshRosen/dev-run-tests-return-code-handling.

(cherry picked from commit acef843f67e770f0a2709fb3fbd1a53c200b2bc5)
Signed-off-by: Andrew Or 


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/0a8ada50
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/0a8ada50
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/0a8ada50

Branch: refs/heads/branch-1.6
Commit: 0a8ada5064bec22116363f93ed476352776b49e4
Parents: cffc080
Author: Josh Rosen 
Authored: Thu Jun 16 14:18:58 2016 -0700
Committer: Andrew Or 
Committed: Thu Jun 16 14:19:19 2016 -0700

--
 dev/run-tests.py   | 2 +-
 dev/sparktestsupport/shellutils.py | 5 -
 2 files changed, 5 insertions(+), 2 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/0a8ada50/dev/run-tests.py
--
diff --git a/dev/run-tests.py b/dev/run-tests.py
index 4a18d1a..e42e073 100755
--- a/dev/run-tests.py
+++ b/dev/run-tests.py
@@ -284,7 +284,7 @@ def exec_sbt(sbt_args=()):
 print(line, end='')
 retcode = sbt_proc.wait()
 
-if retcode > 0:
+if retcode != 0:
 exit_from_command_with_retcode(sbt_cmd, retcode)
 
 

http://git-wip-us.apache.org/repos/asf/spark/blob/0a8ada50/dev/sparktestsupport/shellutils.py
--
diff --git a/dev/sparktestsupport/shellutils.py 
b/dev/sparktestsupport/shellutils.py
index d280e79..05af871 100644
--- a/dev/sparktestsupport/shellutils.py
+++ b/dev/sparktestsupport/shellutils.py
@@ -53,7 +53,10 @@ else:
 
 
 def exit_from_command_with_retcode(cmd, retcode):
-print("[error] running", ' '.join(cmd), "; received return code", retcode)
+if retcode < 0:
+print("[error] running", ' '.join(cmd), "; process was terminated by 
signal", -retcode)
+else:
+print("[error] running", ' '.join(cmd), "; received return code", 
retcode)
 sys.exit(int(os.environ.get("CURRENT_BLOCK", 255)))
 
 


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org



spark git commit: [SPARK-15975] Fix improper Popen retcode code handling in dev/run-tests

2016-06-16 Thread andrewor14
Repository: spark
Updated Branches:
  refs/heads/branch-2.0 d9dd46edd -> 938988757


[SPARK-15975] Fix improper Popen retcode code handling in dev/run-tests

In the `dev/run-tests.py` script we check a `Popen.retcode` for success using 
`retcode > 0`, but this is subtlety wrong because Popen's return code will be 
negative if the child process was terminated by a signal: 
https://docs.python.org/2/library/subprocess.html#subprocess.Popen.returncode

In order to properly handle signals, we should change this to check `retcode != 
0` instead.

Author: Josh Rosen 

Closes #13692 from JoshRosen/dev-run-tests-return-code-handling.

(cherry picked from commit acef843f67e770f0a2709fb3fbd1a53c200b2bc5)
Signed-off-by: Andrew Or 


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/93898875
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/93898875
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/93898875

Branch: refs/heads/branch-2.0
Commit: 9389887571705e03d18e695301f0cb0aa5bd9e21
Parents: d9dd46e
Author: Josh Rosen 
Authored: Thu Jun 16 14:18:58 2016 -0700
Committer: Andrew Or 
Committed: Thu Jun 16 14:19:08 2016 -0700

--
 dev/run-tests.py   | 2 +-
 dev/sparktestsupport/shellutils.py | 5 -
 2 files changed, 5 insertions(+), 2 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/93898875/dev/run-tests.py
--
diff --git a/dev/run-tests.py b/dev/run-tests.py
index dcf1be9..930d7f8 100755
--- a/dev/run-tests.py
+++ b/dev/run-tests.py
@@ -294,7 +294,7 @@ def exec_sbt(sbt_args=()):
 print(line, end='')
 retcode = sbt_proc.wait()
 
-if retcode > 0:
+if retcode != 0:
 exit_from_command_with_retcode(sbt_cmd, retcode)
 
 

http://git-wip-us.apache.org/repos/asf/spark/blob/93898875/dev/sparktestsupport/shellutils.py
--
diff --git a/dev/sparktestsupport/shellutils.py 
b/dev/sparktestsupport/shellutils.py
index d280e79..05af871 100644
--- a/dev/sparktestsupport/shellutils.py
+++ b/dev/sparktestsupport/shellutils.py
@@ -53,7 +53,10 @@ else:
 
 
 def exit_from_command_with_retcode(cmd, retcode):
-print("[error] running", ' '.join(cmd), "; received return code", retcode)
+if retcode < 0:
+print("[error] running", ' '.join(cmd), "; process was terminated by 
signal", -retcode)
+else:
+print("[error] running", ' '.join(cmd), "; received return code", 
retcode)
 sys.exit(int(os.environ.get("CURRENT_BLOCK", 255)))
 
 


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org



spark git commit: [SPARK-15975] Fix improper Popen retcode code handling in dev/run-tests

2016-06-16 Thread andrewor14
Repository: spark
Updated Branches:
  refs/heads/branch-1.5 6043fa8df -> 1891e04a6


[SPARK-15975] Fix improper Popen retcode code handling in dev/run-tests

In the `dev/run-tests.py` script we check a `Popen.retcode` for success using 
`retcode > 0`, but this is subtlety wrong because Popen's return code will be 
negative if the child process was terminated by a signal: 
https://docs.python.org/2/library/subprocess.html#subprocess.Popen.returncode

In order to properly handle signals, we should change this to check `retcode != 
0` instead.

Author: Josh Rosen 

Closes #13692 from JoshRosen/dev-run-tests-return-code-handling.

(cherry picked from commit acef843f67e770f0a2709fb3fbd1a53c200b2bc5)
Signed-off-by: Andrew Or 


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/1891e04a
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/1891e04a
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/1891e04a

Branch: refs/heads/branch-1.5
Commit: 1891e04a6441606f9bb14cf39f06a7d39cce456b
Parents: 6043fa8
Author: Josh Rosen 
Authored: Thu Jun 16 14:18:58 2016 -0700
Committer: Andrew Or 
Committed: Thu Jun 16 14:19:32 2016 -0700

--
 dev/run-tests.py   | 2 +-
 dev/sparktestsupport/shellutils.py | 5 -
 2 files changed, 5 insertions(+), 2 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/1891e04a/dev/run-tests.py
--
diff --git a/dev/run-tests.py b/dev/run-tests.py
index 623b93c..bc54968 100755
--- a/dev/run-tests.py
+++ b/dev/run-tests.py
@@ -255,7 +255,7 @@ def exec_sbt(sbt_args=()):
 print(line, end='')
 retcode = sbt_proc.wait()
 
-if retcode > 0:
+if retcode != 0:
 exit_from_command_with_retcode(sbt_cmd, retcode)
 
 

http://git-wip-us.apache.org/repos/asf/spark/blob/1891e04a/dev/sparktestsupport/shellutils.py
--
diff --git a/dev/sparktestsupport/shellutils.py 
b/dev/sparktestsupport/shellutils.py
index 12bd0bf..af483a9 100644
--- a/dev/sparktestsupport/shellutils.py
+++ b/dev/sparktestsupport/shellutils.py
@@ -23,7 +23,10 @@ import sys
 
 
 def exit_from_command_with_retcode(cmd, retcode):
-print("[error] running", ' '.join(cmd), "; received return code", retcode)
+if retcode < 0:
+print("[error] running", ' '.join(cmd), "; process was terminated by 
signal", -retcode)
+else:
+print("[error] running", ' '.join(cmd), "; received return code", 
retcode)
 sys.exit(int(os.environ.get("CURRENT_BLOCK", 255)))
 
 


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org



spark git commit: [SPARK-15975] Fix improper Popen retcode code handling in dev/run-tests

2016-06-16 Thread andrewor14
Repository: spark
Updated Branches:
  refs/heads/master bbad4cb48 -> acef843f6


[SPARK-15975] Fix improper Popen retcode code handling in dev/run-tests

In the `dev/run-tests.py` script we check a `Popen.retcode` for success using 
`retcode > 0`, but this is subtlety wrong because Popen's return code will be 
negative if the child process was terminated by a signal: 
https://docs.python.org/2/library/subprocess.html#subprocess.Popen.returncode

In order to properly handle signals, we should change this to check `retcode != 
0` instead.

Author: Josh Rosen 

Closes #13692 from JoshRosen/dev-run-tests-return-code-handling.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/acef843f
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/acef843f
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/acef843f

Branch: refs/heads/master
Commit: acef843f67e770f0a2709fb3fbd1a53c200b2bc5
Parents: bbad4cb
Author: Josh Rosen 
Authored: Thu Jun 16 14:18:58 2016 -0700
Committer: Andrew Or 
Committed: Thu Jun 16 14:18:58 2016 -0700

--
 dev/run-tests.py   | 2 +-
 dev/sparktestsupport/shellutils.py | 5 -
 2 files changed, 5 insertions(+), 2 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/acef843f/dev/run-tests.py
--
diff --git a/dev/run-tests.py b/dev/run-tests.py
index dcf1be9..930d7f8 100755
--- a/dev/run-tests.py
+++ b/dev/run-tests.py
@@ -294,7 +294,7 @@ def exec_sbt(sbt_args=()):
 print(line, end='')
 retcode = sbt_proc.wait()
 
-if retcode > 0:
+if retcode != 0:
 exit_from_command_with_retcode(sbt_cmd, retcode)
 
 

http://git-wip-us.apache.org/repos/asf/spark/blob/acef843f/dev/sparktestsupport/shellutils.py
--
diff --git a/dev/sparktestsupport/shellutils.py 
b/dev/sparktestsupport/shellutils.py
index d280e79..05af871 100644
--- a/dev/sparktestsupport/shellutils.py
+++ b/dev/sparktestsupport/shellutils.py
@@ -53,7 +53,10 @@ else:
 
 
 def exit_from_command_with_retcode(cmd, retcode):
-print("[error] running", ' '.join(cmd), "; received return code", retcode)
+if retcode < 0:
+print("[error] running", ' '.join(cmd), "; process was terminated by 
signal", -retcode)
+else:
+print("[error] running", ' '.join(cmd), "; received return code", 
retcode)
 sys.exit(int(os.environ.get("CURRENT_BLOCK", 255)))
 
 


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org



spark git commit: [SPARK-15978][SQL] improve 'show tables' command related codes

2016-06-16 Thread andrewor14
Repository: spark
Updated Branches:
  refs/heads/branch-2.0 095ddb4c9 -> d9dd46edd


[SPARK-15978][SQL] improve 'show tables' command related codes

## What changes were proposed in this pull request?

I've found some minor issues in "show tables" command:

1. In the `SessionCatalog.scala`, `listTables(db: String)` method will call 
`listTables(formatDatabaseName(db), "*")` to list all the tables for certain 
db, but in the method `listTables(db: String, pattern: String)`, this db name 
is formatted once more. So I think we should remove
`formatDatabaseName()` in the caller.

2. I suggest to add sort to listTables(db: String) in InMemoryCatalog.scala, 
just like listDatabases().

## How was this patch tested?

The existing test cases should cover it.

Author: bomeng 

Closes #13695 from bomeng/SPARK-15978.

(cherry picked from commit bbad4cb48df2ac3ed7edb4c02db79540bd4085d8)
Signed-off-by: Andrew Or 


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/d9dd46ed
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/d9dd46ed
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/d9dd46ed

Branch: refs/heads/branch-2.0
Commit: d9dd46edd3635ed79134a1521403c4478a34d3b3
Parents: 095ddb4
Author: bomeng 
Authored: Thu Jun 16 14:18:02 2016 -0700
Committer: Andrew Or 
Committed: Thu Jun 16 14:18:12 2016 -0700

--
 .../org/apache/spark/sql/catalyst/catalog/InMemoryCatalog.scala| 2 +-
 .../org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/d9dd46ed/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/InMemoryCatalog.scala
--
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/InMemoryCatalog.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/InMemoryCatalog.scala
index 14da30a..fb3e1b3 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/InMemoryCatalog.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/InMemoryCatalog.scala
@@ -286,7 +286,7 @@ class InMemoryCatalog(hadoopConfig: Configuration = new 
Configuration) extends E
 
   override def listTables(db: String): Seq[String] = synchronized {
 requireDbExists(db)
-catalog(db).tables.keySet.toSeq
+catalog(db).tables.keySet.toSeq.sorted
   }
 
   override def listTables(db: String, pattern: String): Seq[String] = 
synchronized {

http://git-wip-us.apache.org/repos/asf/spark/blob/d9dd46ed/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
--
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
index 1ec1bb1..7ab10d1 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
@@ -445,7 +445,7 @@ class SessionCatalog(
   /**
* List all tables in the specified database, including temporary tables.
*/
-  def listTables(db: String): Seq[TableIdentifier] = 
listTables(formatDatabaseName(db), "*")
+  def listTables(db: String): Seq[TableIdentifier] = listTables(db, "*")
 
   /**
* List all matching tables in the specified database, including temporary 
tables.


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org



spark git commit: [SPARK-15978][SQL] improve 'show tables' command related codes

2016-06-16 Thread andrewor14
Repository: spark
Updated Branches:
  refs/heads/master 457126e42 -> bbad4cb48


[SPARK-15978][SQL] improve 'show tables' command related codes

## What changes were proposed in this pull request?

I've found some minor issues in "show tables" command:

1. In the `SessionCatalog.scala`, `listTables(db: String)` method will call 
`listTables(formatDatabaseName(db), "*")` to list all the tables for certain 
db, but in the method `listTables(db: String, pattern: String)`, this db name 
is formatted once more. So I think we should remove
`formatDatabaseName()` in the caller.

2. I suggest to add sort to listTables(db: String) in InMemoryCatalog.scala, 
just like listDatabases().

## How was this patch tested?

The existing test cases should cover it.

Author: bomeng 

Closes #13695 from bomeng/SPARK-15978.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/bbad4cb4
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/bbad4cb4
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/bbad4cb4

Branch: refs/heads/master
Commit: bbad4cb48df2ac3ed7edb4c02db79540bd4085d8
Parents: 457126e
Author: bomeng 
Authored: Thu Jun 16 14:18:02 2016 -0700
Committer: Andrew Or 
Committed: Thu Jun 16 14:18:02 2016 -0700

--
 .../org/apache/spark/sql/catalyst/catalog/InMemoryCatalog.scala| 2 +-
 .../org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/bbad4cb4/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/InMemoryCatalog.scala
--
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/InMemoryCatalog.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/InMemoryCatalog.scala
index 14da30a..fb3e1b3 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/InMemoryCatalog.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/InMemoryCatalog.scala
@@ -286,7 +286,7 @@ class InMemoryCatalog(hadoopConfig: Configuration = new 
Configuration) extends E
 
   override def listTables(db: String): Seq[String] = synchronized {
 requireDbExists(db)
-catalog(db).tables.keySet.toSeq
+catalog(db).tables.keySet.toSeq.sorted
   }
 
   override def listTables(db: String, pattern: String): Seq[String] = 
synchronized {

http://git-wip-us.apache.org/repos/asf/spark/blob/bbad4cb4/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
--
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
index 1ec1bb1..7ab10d1 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
@@ -445,7 +445,7 @@ class SessionCatalog(
   /**
* List all tables in the specified database, including temporary tables.
*/
-  def listTables(db: String): Seq[TableIdentifier] = 
listTables(formatDatabaseName(db), "*")
+  def listTables(db: String): Seq[TableIdentifier] = listTables(db, "*")
 
   /**
* List all matching tables in the specified database, including temporary 
tables.


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org



spark git commit: [SPARK-15796][CORE] Reduce spark.memory.fraction default to avoid overrunning old gen in JVM default config

2016-06-16 Thread srowen
Repository: spark
Updated Branches:
  refs/heads/branch-2.0 579268426 -> 095ddb4c9


[SPARK-15796][CORE] Reduce spark.memory.fraction default to avoid overrunning 
old gen in JVM default config

## What changes were proposed in this pull request?

Reduce `spark.memory.fraction` default to 0.6 in order to make it fit within 
default JVM old generation size (2/3 heap). See JIRA discussion. This means a 
full cache doesn't spill into the new gen. CC andrewor14

## How was this patch tested?

Jenkins tests.

Author: Sean Owen 

Closes #13618 from srowen/SPARK-15796.

(cherry picked from commit 457126e420e66228cc68def4bc3d87e7a282069a)
Signed-off-by: Sean Owen 


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/095ddb4c
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/095ddb4c
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/095ddb4c

Branch: refs/heads/branch-2.0
Commit: 095ddb4c9e7ab9193c15c69eb057a9bb2dbdaed1
Parents: 5792684
Author: Sean Owen 
Authored: Thu Jun 16 23:04:10 2016 +0200
Committer: Sean Owen 
Committed: Thu Jun 16 23:04:19 2016 +0200

--
 .../spark/memory/UnifiedMemoryManager.scala   |  8 
 .../scala/org/apache/spark/DistributedSuite.scala |  2 +-
 docs/configuration.md |  7 ---
 docs/tuning.md| 18 +-
 4 files changed, 26 insertions(+), 9 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/095ddb4c/core/src/main/scala/org/apache/spark/memory/UnifiedMemoryManager.scala
--
diff --git 
a/core/src/main/scala/org/apache/spark/memory/UnifiedMemoryManager.scala 
b/core/src/main/scala/org/apache/spark/memory/UnifiedMemoryManager.scala
index ae747c1..c7b36be 100644
--- a/core/src/main/scala/org/apache/spark/memory/UnifiedMemoryManager.scala
+++ b/core/src/main/scala/org/apache/spark/memory/UnifiedMemoryManager.scala
@@ -25,9 +25,9 @@ import org.apache.spark.storage.BlockId
  * either side can borrow memory from the other.
  *
  * The region shared between execution and storage is a fraction of (the total 
heap space - 300MB)
- * configurable through `spark.memory.fraction` (default 0.75). The position 
of the boundary
+ * configurable through `spark.memory.fraction` (default 0.6). The position of 
the boundary
  * within this space is further determined by `spark.memory.storageFraction` 
(default 0.5).
- * This means the size of the storage region is 0.75 * 0.5 = 0.375 of the heap 
space by default.
+ * This means the size of the storage region is 0.6 * 0.5 = 0.3 of the heap 
space by default.
  *
  * Storage can borrow as much execution memory as is free until execution 
reclaims its space.
  * When this happens, cached blocks will be evicted from memory until 
sufficient borrowed
@@ -187,7 +187,7 @@ object UnifiedMemoryManager {
   // Set aside a fixed amount of memory for non-storage, non-execution 
purposes.
   // This serves a function similar to `spark.memory.fraction`, but guarantees 
that we reserve
   // sufficient memory for the system even for small heaps. E.g. if we have a 
1GB JVM, then
-  // the memory used for execution and storage will be (1024 - 300) * 0.75 = 
543MB by default.
+  // the memory used for execution and storage will be (1024 - 300) * 0.6 = 
434MB by default.
   private val RESERVED_SYSTEM_MEMORY_BYTES = 300 * 1024 * 1024
 
   def apply(conf: SparkConf, numCores: Int): UnifiedMemoryManager = {
@@ -223,7 +223,7 @@ object UnifiedMemoryManager {
   }
 }
 val usableMemory = systemMemory - reservedMemory
-val memoryFraction = conf.getDouble("spark.memory.fraction", 0.75)
+val memoryFraction = conf.getDouble("spark.memory.fraction", 0.6)
 (usableMemory * memoryFraction).toLong
   }
 }

http://git-wip-us.apache.org/repos/asf/spark/blob/095ddb4c/core/src/test/scala/org/apache/spark/DistributedSuite.scala
--
diff --git a/core/src/test/scala/org/apache/spark/DistributedSuite.scala 
b/core/src/test/scala/org/apache/spark/DistributedSuite.scala
index 6e69fc4..0515e6e 100644
--- a/core/src/test/scala/org/apache/spark/DistributedSuite.scala
+++ b/core/src/test/scala/org/apache/spark/DistributedSuite.scala
@@ -223,7 +223,7 @@ class DistributedSuite extends SparkFunSuite with Matchers 
with LocalSparkContex
 
   test("compute when only some partitions fit in memory") {
 val size = 1
-val numPartitions = 10
+val numPartitions = 20
 val conf = new SparkConf()
   .set("spark.storage.unrollMemoryThreshold", "1024")
   .set("spark.testing.memory", size.toString)

http://git-wip-us.apache.org/repos/asf/spark/blob/095ddb4c/docs/configuration.md
--

spark git commit: [SPARK-15796][CORE] Reduce spark.memory.fraction default to avoid overrunning old gen in JVM default config

2016-06-16 Thread srowen
Repository: spark
Updated Branches:
  refs/heads/master 36110a830 -> 457126e42


[SPARK-15796][CORE] Reduce spark.memory.fraction default to avoid overrunning 
old gen in JVM default config

## What changes were proposed in this pull request?

Reduce `spark.memory.fraction` default to 0.6 in order to make it fit within 
default JVM old generation size (2/3 heap). See JIRA discussion. This means a 
full cache doesn't spill into the new gen. CC andrewor14

## How was this patch tested?

Jenkins tests.

Author: Sean Owen 

Closes #13618 from srowen/SPARK-15796.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/457126e4
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/457126e4
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/457126e4

Branch: refs/heads/master
Commit: 457126e420e66228cc68def4bc3d87e7a282069a
Parents: 36110a8
Author: Sean Owen 
Authored: Thu Jun 16 23:04:10 2016 +0200
Committer: Sean Owen 
Committed: Thu Jun 16 23:04:10 2016 +0200

--
 .../spark/memory/UnifiedMemoryManager.scala   |  8 
 .../scala/org/apache/spark/DistributedSuite.scala |  2 +-
 docs/configuration.md |  7 ---
 docs/tuning.md| 18 +-
 4 files changed, 26 insertions(+), 9 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/457126e4/core/src/main/scala/org/apache/spark/memory/UnifiedMemoryManager.scala
--
diff --git 
a/core/src/main/scala/org/apache/spark/memory/UnifiedMemoryManager.scala 
b/core/src/main/scala/org/apache/spark/memory/UnifiedMemoryManager.scala
index ae747c1..c7b36be 100644
--- a/core/src/main/scala/org/apache/spark/memory/UnifiedMemoryManager.scala
+++ b/core/src/main/scala/org/apache/spark/memory/UnifiedMemoryManager.scala
@@ -25,9 +25,9 @@ import org.apache.spark.storage.BlockId
  * either side can borrow memory from the other.
  *
  * The region shared between execution and storage is a fraction of (the total 
heap space - 300MB)
- * configurable through `spark.memory.fraction` (default 0.75). The position 
of the boundary
+ * configurable through `spark.memory.fraction` (default 0.6). The position of 
the boundary
  * within this space is further determined by `spark.memory.storageFraction` 
(default 0.5).
- * This means the size of the storage region is 0.75 * 0.5 = 0.375 of the heap 
space by default.
+ * This means the size of the storage region is 0.6 * 0.5 = 0.3 of the heap 
space by default.
  *
  * Storage can borrow as much execution memory as is free until execution 
reclaims its space.
  * When this happens, cached blocks will be evicted from memory until 
sufficient borrowed
@@ -187,7 +187,7 @@ object UnifiedMemoryManager {
   // Set aside a fixed amount of memory for non-storage, non-execution 
purposes.
   // This serves a function similar to `spark.memory.fraction`, but guarantees 
that we reserve
   // sufficient memory for the system even for small heaps. E.g. if we have a 
1GB JVM, then
-  // the memory used for execution and storage will be (1024 - 300) * 0.75 = 
543MB by default.
+  // the memory used for execution and storage will be (1024 - 300) * 0.6 = 
434MB by default.
   private val RESERVED_SYSTEM_MEMORY_BYTES = 300 * 1024 * 1024
 
   def apply(conf: SparkConf, numCores: Int): UnifiedMemoryManager = {
@@ -223,7 +223,7 @@ object UnifiedMemoryManager {
   }
 }
 val usableMemory = systemMemory - reservedMemory
-val memoryFraction = conf.getDouble("spark.memory.fraction", 0.75)
+val memoryFraction = conf.getDouble("spark.memory.fraction", 0.6)
 (usableMemory * memoryFraction).toLong
   }
 }

http://git-wip-us.apache.org/repos/asf/spark/blob/457126e4/core/src/test/scala/org/apache/spark/DistributedSuite.scala
--
diff --git a/core/src/test/scala/org/apache/spark/DistributedSuite.scala 
b/core/src/test/scala/org/apache/spark/DistributedSuite.scala
index 6e69fc4..0515e6e 100644
--- a/core/src/test/scala/org/apache/spark/DistributedSuite.scala
+++ b/core/src/test/scala/org/apache/spark/DistributedSuite.scala
@@ -223,7 +223,7 @@ class DistributedSuite extends SparkFunSuite with Matchers 
with LocalSparkContex
 
   test("compute when only some partitions fit in memory") {
 val size = 1
-val numPartitions = 10
+val numPartitions = 20
 val conf = new SparkConf()
   .set("spark.storage.unrollMemoryThreshold", "1024")
   .set("spark.testing.memory", size.toString)

http://git-wip-us.apache.org/repos/asf/spark/blob/457126e4/docs/configuration.md
--
diff --git a/docs/configuration.md b/docs/configuration.md
index 32c3a92..f

spark git commit: [SPARK-15922][MLLIB] `toIndexedRowMatrix` should consider the case `cols < offset+colsPerBlock`

2016-06-16 Thread srowen
Repository: spark
Updated Branches:
  refs/heads/branch-2.0 5b003c9bc -> 579268426


[SPARK-15922][MLLIB] `toIndexedRowMatrix` should consider the case `cols < 
offset+colsPerBlock`

## What changes were proposed in this pull request?

SPARK-15922 reports the following scenario throwing an exception due to the 
mismatched vector sizes. This PR handles the exceptional case, `cols < (offset 
+ colsPerBlock)`.

**Before**
```scala
scala> import org.apache.spark.mllib.linalg.distributed._
scala> import org.apache.spark.mllib.linalg._
scala> val rows = IndexedRow(0L, new DenseVector(Array(1,2,3))) :: 
IndexedRow(1L, new DenseVector(Array(1,2,3))):: IndexedRow(2L, new 
DenseVector(Array(1,2,3))):: Nil
scala> val rdd = sc.parallelize(rows)
scala> val matrix = new IndexedRowMatrix(rdd, 3, 3)
scala> val bmat = matrix.toBlockMatrix
scala> val imat = bmat.toIndexedRowMatrix
scala> imat.rows.collect
... // java.lang.IllegalArgumentException: requirement failed: Vectors must be 
the same length!
```

**After**
```scala
...
scala> imat.rows.collect
res0: Array[org.apache.spark.mllib.linalg.distributed.IndexedRow] = 
Array(IndexedRow(0,[1.0,2.0,3.0]), IndexedRow(1,[1.0,2.0,3.0]), 
IndexedRow(2,[1.0,2.0,3.0]))
```

## How was this patch tested?

Pass the Jenkins tests (including the above case)

Author: Dongjoon Hyun 

Closes #13643 from dongjoon-hyun/SPARK-15922.

(cherry picked from commit 36110a8306608186696c536028d2776e022d305a)
Signed-off-by: Sean Owen 


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/57926842
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/57926842
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/57926842

Branch: refs/heads/branch-2.0
Commit: 5792684268b273562e694855eb671c21c4044280
Parents: 5b003c9
Author: Dongjoon Hyun 
Authored: Thu Jun 16 23:02:46 2016 +0200
Committer: Sean Owen 
Committed: Thu Jun 16 23:03:00 2016 +0200

--
 .../org/apache/spark/mllib/linalg/distributed/BlockMatrix.scala | 2 +-
 .../spark/mllib/linalg/distributed/BlockMatrixSuite.scala   | 5 +
 2 files changed, 6 insertions(+), 1 deletion(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/57926842/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/BlockMatrix.scala
--
diff --git 
a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/BlockMatrix.scala
 
b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/BlockMatrix.scala
index 7a24617..639295c 100644
--- 
a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/BlockMatrix.scala
+++ 
b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/BlockMatrix.scala
@@ -288,7 +288,7 @@ class BlockMatrix @Since("1.3.0") (
 
   vectors.foreach { case (blockColIdx: Int, vec: BV[Double]) =>
 val offset = colsPerBlock * blockColIdx
-wholeVector(offset until offset + colsPerBlock) := vec
+wholeVector(offset until Math.min(cols, offset + colsPerBlock)) := vec
   }
   new IndexedRow(rowIdx, Vectors.fromBreeze(wholeVector))
 }

http://git-wip-us.apache.org/repos/asf/spark/blob/57926842/mllib/src/test/scala/org/apache/spark/mllib/linalg/distributed/BlockMatrixSuite.scala
--
diff --git 
a/mllib/src/test/scala/org/apache/spark/mllib/linalg/distributed/BlockMatrixSuite.scala
 
b/mllib/src/test/scala/org/apache/spark/mllib/linalg/distributed/BlockMatrixSuite.scala
index e5a2cbb..61266f3 100644
--- 
a/mllib/src/test/scala/org/apache/spark/mllib/linalg/distributed/BlockMatrixSuite.scala
+++ 
b/mllib/src/test/scala/org/apache/spark/mllib/linalg/distributed/BlockMatrixSuite.scala
@@ -135,6 +135,11 @@ class BlockMatrixSuite extends SparkFunSuite with 
MLlibTestSparkContext {
 assert(rowMat.numCols() === n)
 assert(rowMat.toBreeze() === gridBasedMat.toBreeze())
 
+// SPARK-15922: BlockMatrix to IndexedRowMatrix throws an error"
+val bmat = rowMat.toBlockMatrix
+val imat = bmat.toIndexedRowMatrix
+imat.rows.collect
+
 val rows = 1
 val cols = 10
 


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org



spark git commit: [SPARK-15922][MLLIB] `toIndexedRowMatrix` should consider the case `cols < offset+colsPerBlock`

2016-06-16 Thread srowen
Repository: spark
Updated Branches:
  refs/heads/master f9bf15d9b -> 36110a830


[SPARK-15922][MLLIB] `toIndexedRowMatrix` should consider the case `cols < 
offset+colsPerBlock`

## What changes were proposed in this pull request?

SPARK-15922 reports the following scenario throwing an exception due to the 
mismatched vector sizes. This PR handles the exceptional case, `cols < (offset 
+ colsPerBlock)`.

**Before**
```scala
scala> import org.apache.spark.mllib.linalg.distributed._
scala> import org.apache.spark.mllib.linalg._
scala> val rows = IndexedRow(0L, new DenseVector(Array(1,2,3))) :: 
IndexedRow(1L, new DenseVector(Array(1,2,3))):: IndexedRow(2L, new 
DenseVector(Array(1,2,3))):: Nil
scala> val rdd = sc.parallelize(rows)
scala> val matrix = new IndexedRowMatrix(rdd, 3, 3)
scala> val bmat = matrix.toBlockMatrix
scala> val imat = bmat.toIndexedRowMatrix
scala> imat.rows.collect
... // java.lang.IllegalArgumentException: requirement failed: Vectors must be 
the same length!
```

**After**
```scala
...
scala> imat.rows.collect
res0: Array[org.apache.spark.mllib.linalg.distributed.IndexedRow] = 
Array(IndexedRow(0,[1.0,2.0,3.0]), IndexedRow(1,[1.0,2.0,3.0]), 
IndexedRow(2,[1.0,2.0,3.0]))
```

## How was this patch tested?

Pass the Jenkins tests (including the above case)

Author: Dongjoon Hyun 

Closes #13643 from dongjoon-hyun/SPARK-15922.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/36110a83
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/36110a83
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/36110a83

Branch: refs/heads/master
Commit: 36110a8306608186696c536028d2776e022d305a
Parents: f9bf15d
Author: Dongjoon Hyun 
Authored: Thu Jun 16 23:02:46 2016 +0200
Committer: Sean Owen 
Committed: Thu Jun 16 23:02:46 2016 +0200

--
 .../org/apache/spark/mllib/linalg/distributed/BlockMatrix.scala | 2 +-
 .../spark/mllib/linalg/distributed/BlockMatrixSuite.scala   | 5 +
 2 files changed, 6 insertions(+), 1 deletion(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/36110a83/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/BlockMatrix.scala
--
diff --git 
a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/BlockMatrix.scala
 
b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/BlockMatrix.scala
index 7a24617..639295c 100644
--- 
a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/BlockMatrix.scala
+++ 
b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/BlockMatrix.scala
@@ -288,7 +288,7 @@ class BlockMatrix @Since("1.3.0") (
 
   vectors.foreach { case (blockColIdx: Int, vec: BV[Double]) =>
 val offset = colsPerBlock * blockColIdx
-wholeVector(offset until offset + colsPerBlock) := vec
+wholeVector(offset until Math.min(cols, offset + colsPerBlock)) := vec
   }
   new IndexedRow(rowIdx, Vectors.fromBreeze(wholeVector))
 }

http://git-wip-us.apache.org/repos/asf/spark/blob/36110a83/mllib/src/test/scala/org/apache/spark/mllib/linalg/distributed/BlockMatrixSuite.scala
--
diff --git 
a/mllib/src/test/scala/org/apache/spark/mllib/linalg/distributed/BlockMatrixSuite.scala
 
b/mllib/src/test/scala/org/apache/spark/mllib/linalg/distributed/BlockMatrixSuite.scala
index e5a2cbb..61266f3 100644
--- 
a/mllib/src/test/scala/org/apache/spark/mllib/linalg/distributed/BlockMatrixSuite.scala
+++ 
b/mllib/src/test/scala/org/apache/spark/mllib/linalg/distributed/BlockMatrixSuite.scala
@@ -135,6 +135,11 @@ class BlockMatrixSuite extends SparkFunSuite with 
MLlibTestSparkContext {
 assert(rowMat.numCols() === n)
 assert(rowMat.toBreeze() === gridBasedMat.toBreeze())
 
+// SPARK-15922: BlockMatrix to IndexedRowMatrix throws an error"
+val bmat = rowMat.toBlockMatrix
+val imat = bmat.toIndexedRowMatrix
+imat.rows.collect
+
 val rows = 1
 val cols = 10
 


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org



spark git commit: [SPARK-15977][SQL] Fix TRUNCATE TABLE for Spark specific datasource tables

2016-06-16 Thread hvanhovell
Repository: spark
Updated Branches:
  refs/heads/branch-2.0 e11c27918 -> 5b003c9bc


[SPARK-15977][SQL] Fix TRUNCATE TABLE for Spark specific datasource tables

## What changes were proposed in this pull request?
`TRUNCATE TABLE` is currently broken for Spark specific datasource tables 
(json, csv, ...). This PR correctly sets the location for these datasources 
which allows them to be truncated.

## How was this patch tested?
Extended the datasources `TRUNCATE TABLE` tests in `DDLSuite`.

Author: Herman van Hovell 

Closes #13697 from hvanhovell/SPARK-15977.

(cherry picked from commit f9bf15d9bde4df2178f7a8f932c883bb77c46149)
Signed-off-by: Herman van Hovell 


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/5b003c9b
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/5b003c9b
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/5b003c9b

Branch: refs/heads/branch-2.0
Commit: 5b003c9bcf43709408ed8f68d17b249675f50fbc
Parents: e11c279
Author: Herman van Hovell 
Authored: Thu Jun 16 13:47:36 2016 -0700
Committer: Herman van Hovell 
Committed: Thu Jun 16 13:47:55 2016 -0700

--
 .../spark/sql/execution/command/tables.scala|  4 ++-
 .../spark/sql/execution/command/DDLSuite.scala  | 28 +---
 2 files changed, 21 insertions(+), 11 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/5b003c9b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala
--
diff --git 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala
index 58bb5cd..3eb93a2 100644
--- 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala
+++ 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala
@@ -348,7 +348,9 @@ case class TruncateTableCommand(
 s"for tables that are not partitioned: '$tableName'")
 }
 val locations =
-  if (isDatasourceTable || table.partitionColumnNames.isEmpty) {
+  if (isDatasourceTable) {
+Seq(table.storage.serdeProperties.get("path"))
+  } else if (table.partitionColumnNames.isEmpty) {
 Seq(table.storage.locationUri)
   } else {
 catalog.listPartitions(tableName, 
partitionSpec).map(_.storage.locationUri)

http://git-wip-us.apache.org/repos/asf/spark/blob/5b003c9b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala
--
diff --git 
a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala 
b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala
index e15fcf4..7eb2fff 100644
--- 
a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala
+++ 
b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala
@@ -1280,17 +1280,25 @@ class DDLSuite extends QueryTest with SharedSQLContext 
with BeforeAndAfterEach {
   test("truncate table - datasource table") {
 import testImplicits._
 val data = (1 to 10).map { i => (i, i) }.toDF("width", "length")
-data.write.saveAsTable("rectangles")
-spark.catalog.cacheTable("rectangles")
-assume(spark.table("rectangles").collect().nonEmpty, "bad test; table was 
empty to begin with")
-assume(spark.catalog.isCached("rectangles"), "bad test; table was not 
cached to begin with")
-sql("TRUNCATE TABLE rectangles")
-assert(spark.table("rectangles").collect().isEmpty)
-assert(!spark.catalog.isCached("rectangles"))
+
+// Test both a Hive compatible and incompatible code path.
+Seq("json", "parquet").foreach { format =>
+  withTable("rectangles") {
+data.write.format(format).saveAsTable("rectangles")
+assume(spark.table("rectangles").collect().nonEmpty,
+  "bad test; table was empty to begin with")
+sql("TRUNCATE TABLE rectangles")
+assert(spark.table("rectangles").collect().isEmpty)
+  }
+}
+
 // truncating partitioned data source tables is not supported
-data.write.partitionBy("length").saveAsTable("rectangles2")
-assertUnsupported("TRUNCATE TABLE rectangles PARTITION (width=1)")
-assertUnsupported("TRUNCATE TABLE rectangles2 PARTITION (width=1)")
+withTable("rectangles", "rectangles2") {
+  data.write.saveAsTable("rectangles")
+  data.write.partitionBy("length").saveAsTable("rectangles2")
+  assertUnsupported("TRUNCATE TABLE rectangles PARTITION (width=1)")
+  assertUnsupported("TRUNCATE TABLE rectangles2 PARTITION (width=1)")
+}
   }
 
   test("truncate table - external table, temporary table, view (not allowed)") 
{


--

spark git commit: [SPARK-15977][SQL] Fix TRUNCATE TABLE for Spark specific datasource tables

2016-06-16 Thread hvanhovell
Repository: spark
Updated Branches:
  refs/heads/master 084dca770 -> f9bf15d9b


[SPARK-15977][SQL] Fix TRUNCATE TABLE for Spark specific datasource tables

## What changes were proposed in this pull request?
`TRUNCATE TABLE` is currently broken for Spark specific datasource tables 
(json, csv, ...). This PR correctly sets the location for these datasources 
which allows them to be truncated.

## How was this patch tested?
Extended the datasources `TRUNCATE TABLE` tests in `DDLSuite`.

Author: Herman van Hovell 

Closes #13697 from hvanhovell/SPARK-15977.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/f9bf15d9
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/f9bf15d9
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/f9bf15d9

Branch: refs/heads/master
Commit: f9bf15d9bde4df2178f7a8f932c883bb77c46149
Parents: 084dca7
Author: Herman van Hovell 
Authored: Thu Jun 16 13:47:36 2016 -0700
Committer: Herman van Hovell 
Committed: Thu Jun 16 13:47:36 2016 -0700

--
 .../spark/sql/execution/command/tables.scala|  4 ++-
 .../spark/sql/execution/command/DDLSuite.scala  | 28 +---
 2 files changed, 21 insertions(+), 11 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/f9bf15d9/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala
--
diff --git 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala
index 58bb5cd..3eb93a2 100644
--- 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala
+++ 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala
@@ -348,7 +348,9 @@ case class TruncateTableCommand(
 s"for tables that are not partitioned: '$tableName'")
 }
 val locations =
-  if (isDatasourceTable || table.partitionColumnNames.isEmpty) {
+  if (isDatasourceTable) {
+Seq(table.storage.serdeProperties.get("path"))
+  } else if (table.partitionColumnNames.isEmpty) {
 Seq(table.storage.locationUri)
   } else {
 catalog.listPartitions(tableName, 
partitionSpec).map(_.storage.locationUri)

http://git-wip-us.apache.org/repos/asf/spark/blob/f9bf15d9/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala
--
diff --git 
a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala 
b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala
index e15fcf4..7eb2fff 100644
--- 
a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala
+++ 
b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala
@@ -1280,17 +1280,25 @@ class DDLSuite extends QueryTest with SharedSQLContext 
with BeforeAndAfterEach {
   test("truncate table - datasource table") {
 import testImplicits._
 val data = (1 to 10).map { i => (i, i) }.toDF("width", "length")
-data.write.saveAsTable("rectangles")
-spark.catalog.cacheTable("rectangles")
-assume(spark.table("rectangles").collect().nonEmpty, "bad test; table was 
empty to begin with")
-assume(spark.catalog.isCached("rectangles"), "bad test; table was not 
cached to begin with")
-sql("TRUNCATE TABLE rectangles")
-assert(spark.table("rectangles").collect().isEmpty)
-assert(!spark.catalog.isCached("rectangles"))
+
+// Test both a Hive compatible and incompatible code path.
+Seq("json", "parquet").foreach { format =>
+  withTable("rectangles") {
+data.write.format(format).saveAsTable("rectangles")
+assume(spark.table("rectangles").collect().nonEmpty,
+  "bad test; table was empty to begin with")
+sql("TRUNCATE TABLE rectangles")
+assert(spark.table("rectangles").collect().isEmpty)
+  }
+}
+
 // truncating partitioned data source tables is not supported
-data.write.partitionBy("length").saveAsTable("rectangles2")
-assertUnsupported("TRUNCATE TABLE rectangles PARTITION (width=1)")
-assertUnsupported("TRUNCATE TABLE rectangles2 PARTITION (width=1)")
+withTable("rectangles", "rectangles2") {
+  data.write.saveAsTable("rectangles")
+  data.write.partitionBy("length").saveAsTable("rectangles2")
+  assertUnsupported("TRUNCATE TABLE rectangles PARTITION (width=1)")
+  assertUnsupported("TRUNCATE TABLE rectangles2 PARTITION (width=1)")
+}
   }
 
   test("truncate table - external table, temporary table, view (not allowed)") 
{


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional comm

spark git commit: [SPARK-15981][SQL][STREAMING] Fixed bug and added tests in DataStreamReader Python API

2016-06-16 Thread zsxwing
Repository: spark
Updated Branches:
  refs/heads/branch-2.0 0a2291cd1 -> e11c27918


[SPARK-15981][SQL][STREAMING] Fixed bug and added tests in DataStreamReader 
Python API

## What changes were proposed in this pull request?

- Fixed bug in Python API of DataStreamReader.  Because a single path was being 
converted to a array before calling Java DataStreamReader method (which takes a 
string only), it gave the following error.
```
File "/Users/tdas/Projects/Spark/spark/python/pyspark/sql/readwriter.py", line 
947, in pyspark.sql.readwriter.DataStreamReader.json
Failed example:
json_sdf = spark.readStream.json(os.path.join(tempfile.mkdtemp(), 'data'),  
   schema = sdf_schema)
Exception raised:
Traceback (most recent call last):
  File 
"/System/Library/Frameworks/Python.framework/Versions/2.6/lib/python2.6/doctest.py",
 line 1253, in __run
compileflags, 1) in test.globs
  File "", line 1, 
in 
json_sdf = spark.readStream.json(os.path.join(tempfile.mkdtemp(), 
'data'), schema = sdf_schema)
  File "/Users/tdas/Projects/Spark/spark/python/pyspark/sql/readwriter.py", 
line 963, in json
return self._df(self._jreader.json(path))
  File 
"/Users/tdas/Projects/Spark/spark/python/lib/py4j-0.10.1-src.zip/py4j/java_gateway.py",
 line 933, in __call__
answer, self.gateway_client, self.target_id, self.name)
  File "/Users/tdas/Projects/Spark/spark/python/pyspark/sql/utils.py", line 
63, in deco
return f(*a, **kw)
  File 
"/Users/tdas/Projects/Spark/spark/python/lib/py4j-0.10.1-src.zip/py4j/protocol.py",
 line 316, in get_return_value
format(target_id, ".", name, value))
Py4JError: An error occurred while calling o121.json. Trace:
py4j.Py4JException: Method json([class java.util.ArrayList]) does not exist
at py4j.reflection.ReflectionEngine.getMethod(ReflectionEngine.java:318)
at py4j.reflection.ReflectionEngine.getMethod(ReflectionEngine.java:326)
at py4j.Gateway.invoke(Gateway.java:272)
at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:128)
at py4j.commands.CallCommand.execute(CallCommand.java:79)
at py4j.GatewayConnection.run(GatewayConnection.java:211)
at java.lang.Thread.run(Thread.java:744)
```

- Reduced code duplication between DataStreamReader and DataFrameWriter
- Added missing Python doctests

## How was this patch tested?
New tests

Author: Tathagata Das 

Closes #13703 from tdas/SPARK-15981.

(cherry picked from commit 084dca770f5c26f906e7555707c7894cf05fb86b)
Signed-off-by: Shixiong Zhu 


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/e11c2791
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/e11c2791
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/e11c2791

Branch: refs/heads/branch-2.0
Commit: e11c279188b34d410f6ecf17cb1773c95f24a19e
Parents: 0a2291c
Author: Tathagata Das 
Authored: Thu Jun 16 13:17:41 2016 -0700
Committer: Shixiong Zhu 
Committed: Thu Jun 16 13:17:50 2016 -0700

--
 python/pyspark/sql/readwriter.py | 258 ++
 1 file changed, 136 insertions(+), 122 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/e11c2791/python/pyspark/sql/readwriter.py
--
diff --git a/python/pyspark/sql/readwriter.py b/python/pyspark/sql/readwriter.py
index c982de6..72fd184 100644
--- a/python/pyspark/sql/readwriter.py
+++ b/python/pyspark/sql/readwriter.py
@@ -44,7 +44,82 @@ def to_str(value):
 return str(value)
 
 
-class DataFrameReader(object):
+class ReaderUtils(object):
+
+def _set_json_opts(self, schema, primitivesAsString, prefersDecimal,
+   allowComments, allowUnquotedFieldNames, 
allowSingleQuotes,
+   allowNumericLeadingZero, 
allowBackslashEscapingAnyCharacter,
+   mode, columnNameOfCorruptRecord):
+"""
+Set options based on the Json optional parameters
+"""
+if schema is not None:
+self.schema(schema)
+if primitivesAsString is not None:
+self.option("primitivesAsString", primitivesAsString)
+if prefersDecimal is not None:
+self.option("prefersDecimal", prefersDecimal)
+if allowComments is not None:
+self.option("allowComments", allowComments)
+if allowUnquotedFieldNames is not None:
+self.option("allowUnquotedFieldNames", allowUnquotedFieldNames)
+if allowSingleQuotes is not None:
+self.option("allowSingleQuotes", allowSingleQuotes)
+if allowNumericLeadingZero is not None:
+self.option("allowNumericLeadingZero", allowNumericLeadingZero)
+if allo

spark git commit: [SPARK-15981][SQL][STREAMING] Fixed bug and added tests in DataStreamReader Python API

2016-06-16 Thread zsxwing
Repository: spark
Updated Branches:
  refs/heads/master a865f6e05 -> 084dca770


[SPARK-15981][SQL][STREAMING] Fixed bug and added tests in DataStreamReader 
Python API

## What changes were proposed in this pull request?

- Fixed bug in Python API of DataStreamReader.  Because a single path was being 
converted to a array before calling Java DataStreamReader method (which takes a 
string only), it gave the following error.
```
File "/Users/tdas/Projects/Spark/spark/python/pyspark/sql/readwriter.py", line 
947, in pyspark.sql.readwriter.DataStreamReader.json
Failed example:
json_sdf = spark.readStream.json(os.path.join(tempfile.mkdtemp(), 'data'),  
   schema = sdf_schema)
Exception raised:
Traceback (most recent call last):
  File 
"/System/Library/Frameworks/Python.framework/Versions/2.6/lib/python2.6/doctest.py",
 line 1253, in __run
compileflags, 1) in test.globs
  File "", line 1, 
in 
json_sdf = spark.readStream.json(os.path.join(tempfile.mkdtemp(), 
'data'), schema = sdf_schema)
  File "/Users/tdas/Projects/Spark/spark/python/pyspark/sql/readwriter.py", 
line 963, in json
return self._df(self._jreader.json(path))
  File 
"/Users/tdas/Projects/Spark/spark/python/lib/py4j-0.10.1-src.zip/py4j/java_gateway.py",
 line 933, in __call__
answer, self.gateway_client, self.target_id, self.name)
  File "/Users/tdas/Projects/Spark/spark/python/pyspark/sql/utils.py", line 
63, in deco
return f(*a, **kw)
  File 
"/Users/tdas/Projects/Spark/spark/python/lib/py4j-0.10.1-src.zip/py4j/protocol.py",
 line 316, in get_return_value
format(target_id, ".", name, value))
Py4JError: An error occurred while calling o121.json. Trace:
py4j.Py4JException: Method json([class java.util.ArrayList]) does not exist
at py4j.reflection.ReflectionEngine.getMethod(ReflectionEngine.java:318)
at py4j.reflection.ReflectionEngine.getMethod(ReflectionEngine.java:326)
at py4j.Gateway.invoke(Gateway.java:272)
at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:128)
at py4j.commands.CallCommand.execute(CallCommand.java:79)
at py4j.GatewayConnection.run(GatewayConnection.java:211)
at java.lang.Thread.run(Thread.java:744)
```

- Reduced code duplication between DataStreamReader and DataFrameWriter
- Added missing Python doctests

## How was this patch tested?
New tests

Author: Tathagata Das 

Closes #13703 from tdas/SPARK-15981.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/084dca77
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/084dca77
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/084dca77

Branch: refs/heads/master
Commit: 084dca770f5c26f906e7555707c7894cf05fb86b
Parents: a865f6e
Author: Tathagata Das 
Authored: Thu Jun 16 13:17:41 2016 -0700
Committer: Shixiong Zhu 
Committed: Thu Jun 16 13:17:41 2016 -0700

--
 python/pyspark/sql/readwriter.py | 258 ++
 1 file changed, 136 insertions(+), 122 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/084dca77/python/pyspark/sql/readwriter.py
--
diff --git a/python/pyspark/sql/readwriter.py b/python/pyspark/sql/readwriter.py
index c982de6..72fd184 100644
--- a/python/pyspark/sql/readwriter.py
+++ b/python/pyspark/sql/readwriter.py
@@ -44,7 +44,82 @@ def to_str(value):
 return str(value)
 
 
-class DataFrameReader(object):
+class ReaderUtils(object):
+
+def _set_json_opts(self, schema, primitivesAsString, prefersDecimal,
+   allowComments, allowUnquotedFieldNames, 
allowSingleQuotes,
+   allowNumericLeadingZero, 
allowBackslashEscapingAnyCharacter,
+   mode, columnNameOfCorruptRecord):
+"""
+Set options based on the Json optional parameters
+"""
+if schema is not None:
+self.schema(schema)
+if primitivesAsString is not None:
+self.option("primitivesAsString", primitivesAsString)
+if prefersDecimal is not None:
+self.option("prefersDecimal", prefersDecimal)
+if allowComments is not None:
+self.option("allowComments", allowComments)
+if allowUnquotedFieldNames is not None:
+self.option("allowUnquotedFieldNames", allowUnquotedFieldNames)
+if allowSingleQuotes is not None:
+self.option("allowSingleQuotes", allowSingleQuotes)
+if allowNumericLeadingZero is not None:
+self.option("allowNumericLeadingZero", allowNumericLeadingZero)
+if allowBackslashEscapingAnyCharacter is not None:
+self.option("allowBackslashEscapingAnyCharacter", 

spark git commit: [SPARK-15996][R] Fix R examples by removing deprecated functions

2016-06-16 Thread shivaram
Repository: spark
Updated Branches:
  refs/heads/branch-2.0 c53eda03a -> 0a2291cd1


[SPARK-15996][R] Fix R examples by removing deprecated functions

## What changes were proposed in this pull request?

Currently, R examples(`dataframe.R` and `data-manipulation.R`) fail like the 
following. We had better update them before releasing 2.0 RC. This PR updates 
them to use up-to-date APIs.

```bash
$ bin/spark-submit examples/src/main/r/dataframe.R
...
Warning message:
'createDataFrame(sqlContext...)' is deprecated.
Use 'createDataFrame(data, schema = NULL, samplingRatio = 1.0)' instead.
See help("Deprecated")
...
Warning message:
'read.json(sqlContext...)' is deprecated.
Use 'read.json(path)' instead.
See help("Deprecated")
...
Error: could not find function "registerTempTable"
Execution halted
```

## How was this patch tested?

Manual.
```
curl -LO http://s3-us-west-2.amazonaws.com/sparkr-data/flights.csv
bin/spark-submit examples/src/main/r/dataframe.R
bin/spark-submit examples/src/main/r/data-manipulation.R flights.csv
```

Author: Dongjoon Hyun 

Closes #13714 from dongjoon-hyun/SPARK-15996.

(cherry picked from commit a865f6e05297f6121bb2fde717860f9edeed263e)
Signed-off-by: Shivaram Venkataraman 


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/0a2291cd
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/0a2291cd
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/0a2291cd

Branch: refs/heads/branch-2.0
Commit: 0a2291cd15751018f1680e92aa8f63be4546e7a7
Parents: c53eda0
Author: Dongjoon Hyun 
Authored: Thu Jun 16 12:46:25 2016 -0700
Committer: Shivaram Venkataraman 
Committed: Thu Jun 16 12:46:32 2016 -0700

--
 examples/src/main/r/data-manipulation.R |  8 
 examples/src/main/r/dataframe.R | 11 +++
 2 files changed, 11 insertions(+), 8 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/0a2291cd/examples/src/main/r/data-manipulation.R
--
diff --git a/examples/src/main/r/data-manipulation.R 
b/examples/src/main/r/data-manipulation.R
index 58a3013..badb98b 100644
--- a/examples/src/main/r/data-manipulation.R
+++ b/examples/src/main/r/data-manipulation.R
@@ -49,10 +49,10 @@ flights_df$date <- as.Date(flights_df$date)
 SFO_df <- flights_df[flights_df$dest == "SFO", ] 
 
 # Convert the local data frame into a SparkDataFrame
-SFO_DF <- createDataFrame(sqlContext, SFO_df)
+SFO_DF <- createDataFrame(SFO_df)
 
 #  Directly create a SparkDataFrame from the source data
-flightsDF <- read.df(sqlContext, flightsCsvPath, source = "csv", header = 
"true")
+flightsDF <- read.df(flightsCsvPath, source = "csv", header = "true")
 
 # Print the schema of this SparkDataFrame
 printSchema(flightsDF)
@@ -75,8 +75,8 @@ destDF <- select(flightsDF, "dest", "cancelled")
 
 # Using SQL to select columns of data
 # First, register the flights SparkDataFrame as a table
-registerTempTable(flightsDF, "flightsTable")
-destDF <- sql(sqlContext, "SELECT dest, cancelled FROM flightsTable")
+createOrReplaceTempView(flightsDF, "flightsTable")
+destDF <- sql("SELECT dest, cancelled FROM flightsTable")
 
 # Use collect to create a local R data frame
 local_df <- collect(destDF)

http://git-wip-us.apache.org/repos/asf/spark/blob/0a2291cd/examples/src/main/r/dataframe.R
--
diff --git a/examples/src/main/r/dataframe.R b/examples/src/main/r/dataframe.R
index 436bac6..0434705 100644
--- a/examples/src/main/r/dataframe.R
+++ b/examples/src/main/r/dataframe.R
@@ -25,7 +25,7 @@ sqlContext <- sparkRSQL.init(sc)
 localDF <- data.frame(name=c("John", "Smith", "Sarah"), age=c(19, 23, 18))
 
 # Convert local data frame to a SparkDataFrame
-df <- createDataFrame(sqlContext, localDF)
+df <- createDataFrame(localDF)
 
 # Print its schema
 printSchema(df)
@@ -35,14 +35,17 @@ printSchema(df)
 
 # Create a DataFrame from a JSON file
 path <- file.path(Sys.getenv("SPARK_HOME"), 
"examples/src/main/resources/people.json")
-peopleDF <- read.json(sqlContext, path)
+peopleDF <- read.json(path)
 printSchema(peopleDF)
+# root
+#  |-- age: long (nullable = true)
+#  |-- name: string (nullable = true)
 
 # Register this DataFrame as a table.
-registerTempTable(peopleDF, "people")
+createOrReplaceTempView(peopleDF, "people")
 
 # SQL statements can be run by using the sql methods provided by sqlContext
-teenagers <- sql(sqlContext, "SELECT name FROM people WHERE age >= 13 AND age 
<= 19")
+teenagers <- sql("SELECT name FROM people WHERE age >= 13 AND age <= 19")
 
 # Call collect to get a local data.frame
 teenagersLocalDF <- collect(teenagers)


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For addition

spark git commit: [SPARK-15996][R] Fix R examples by removing deprecated functions

2016-06-16 Thread shivaram
Repository: spark
Updated Branches:
  refs/heads/master 9ea0d5e32 -> a865f6e05


[SPARK-15996][R] Fix R examples by removing deprecated functions

## What changes were proposed in this pull request?

Currently, R examples(`dataframe.R` and `data-manipulation.R`) fail like the 
following. We had better update them before releasing 2.0 RC. This PR updates 
them to use up-to-date APIs.

```bash
$ bin/spark-submit examples/src/main/r/dataframe.R
...
Warning message:
'createDataFrame(sqlContext...)' is deprecated.
Use 'createDataFrame(data, schema = NULL, samplingRatio = 1.0)' instead.
See help("Deprecated")
...
Warning message:
'read.json(sqlContext...)' is deprecated.
Use 'read.json(path)' instead.
See help("Deprecated")
...
Error: could not find function "registerTempTable"
Execution halted
```

## How was this patch tested?

Manual.
```
curl -LO http://s3-us-west-2.amazonaws.com/sparkr-data/flights.csv
bin/spark-submit examples/src/main/r/dataframe.R
bin/spark-submit examples/src/main/r/data-manipulation.R flights.csv
```

Author: Dongjoon Hyun 

Closes #13714 from dongjoon-hyun/SPARK-15996.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/a865f6e0
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/a865f6e0
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/a865f6e0

Branch: refs/heads/master
Commit: a865f6e05297f6121bb2fde717860f9edeed263e
Parents: 9ea0d5e
Author: Dongjoon Hyun 
Authored: Thu Jun 16 12:46:25 2016 -0700
Committer: Shivaram Venkataraman 
Committed: Thu Jun 16 12:46:25 2016 -0700

--
 examples/src/main/r/data-manipulation.R |  8 
 examples/src/main/r/dataframe.R | 11 +++
 2 files changed, 11 insertions(+), 8 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/a865f6e0/examples/src/main/r/data-manipulation.R
--
diff --git a/examples/src/main/r/data-manipulation.R 
b/examples/src/main/r/data-manipulation.R
index 58a3013..badb98b 100644
--- a/examples/src/main/r/data-manipulation.R
+++ b/examples/src/main/r/data-manipulation.R
@@ -49,10 +49,10 @@ flights_df$date <- as.Date(flights_df$date)
 SFO_df <- flights_df[flights_df$dest == "SFO", ] 
 
 # Convert the local data frame into a SparkDataFrame
-SFO_DF <- createDataFrame(sqlContext, SFO_df)
+SFO_DF <- createDataFrame(SFO_df)
 
 #  Directly create a SparkDataFrame from the source data
-flightsDF <- read.df(sqlContext, flightsCsvPath, source = "csv", header = 
"true")
+flightsDF <- read.df(flightsCsvPath, source = "csv", header = "true")
 
 # Print the schema of this SparkDataFrame
 printSchema(flightsDF)
@@ -75,8 +75,8 @@ destDF <- select(flightsDF, "dest", "cancelled")
 
 # Using SQL to select columns of data
 # First, register the flights SparkDataFrame as a table
-registerTempTable(flightsDF, "flightsTable")
-destDF <- sql(sqlContext, "SELECT dest, cancelled FROM flightsTable")
+createOrReplaceTempView(flightsDF, "flightsTable")
+destDF <- sql("SELECT dest, cancelled FROM flightsTable")
 
 # Use collect to create a local R data frame
 local_df <- collect(destDF)

http://git-wip-us.apache.org/repos/asf/spark/blob/a865f6e0/examples/src/main/r/dataframe.R
--
diff --git a/examples/src/main/r/dataframe.R b/examples/src/main/r/dataframe.R
index 436bac6..0434705 100644
--- a/examples/src/main/r/dataframe.R
+++ b/examples/src/main/r/dataframe.R
@@ -25,7 +25,7 @@ sqlContext <- sparkRSQL.init(sc)
 localDF <- data.frame(name=c("John", "Smith", "Sarah"), age=c(19, 23, 18))
 
 # Convert local data frame to a SparkDataFrame
-df <- createDataFrame(sqlContext, localDF)
+df <- createDataFrame(localDF)
 
 # Print its schema
 printSchema(df)
@@ -35,14 +35,17 @@ printSchema(df)
 
 # Create a DataFrame from a JSON file
 path <- file.path(Sys.getenv("SPARK_HOME"), 
"examples/src/main/resources/people.json")
-peopleDF <- read.json(sqlContext, path)
+peopleDF <- read.json(path)
 printSchema(peopleDF)
+# root
+#  |-- age: long (nullable = true)
+#  |-- name: string (nullable = true)
 
 # Register this DataFrame as a table.
-registerTempTable(peopleDF, "people")
+createOrReplaceTempView(peopleDF, "people")
 
 # SQL statements can be run by using the sql methods provided by sqlContext
-teenagers <- sql(sqlContext, "SELECT name FROM people WHERE age >= 13 AND age 
<= 19")
+teenagers <- sql("SELECT name FROM people WHERE age >= 13 AND age <= 19")
 
 # Call collect to get a local data.frame
 teenagersLocalDF <- collect(teenagers)


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org



spark git commit: [SPARK-15983][SQL] Removes FileFormat.prepareRead

2016-06-16 Thread wenchen
Repository: spark
Updated Branches:
  refs/heads/branch-2.0 26359d27c -> c53eda03a


[SPARK-15983][SQL] Removes FileFormat.prepareRead

## What changes were proposed in this pull request?

Interface method `FileFormat.prepareRead()` was added in #12088 to handle a 
special case in the LibSVM data source.

However, the semantics of this interface method isn't intuitive: it returns a 
modified version of the data source options map. Considering that the LibSVM 
case can be easily handled using schema metadata inside `inferSchema`, we can 
remove this interface method to keep the `FileFormat` interface clean.

## How was this patch tested?

Existing tests.

Author: Cheng Lian 

Closes #13698 from liancheng/remove-prepare-read.

(cherry picked from commit 9ea0d5e326e08b914aa46f1eec8795688a61bf74)
Signed-off-by: Wenchen Fan 


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/c53eda03
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/c53eda03
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/c53eda03

Branch: refs/heads/branch-2.0
Commit: c53eda03a282fb0569dd7e0dae3785999d022c8f
Parents: 26359d2
Author: Cheng Lian 
Authored: Thu Jun 16 10:24:29 2016 -0700
Committer: Wenchen Fan 
Committed: Thu Jun 16 10:24:38 2016 -0700

--
 .../spark/ml/source/libsvm/LibSVMRelation.scala | 33 ++--
 .../sql/execution/datasources/DataSource.scala  |  5 +--
 .../datasources/fileSourceInterfaces.scala  | 11 ---
 3 files changed, 18 insertions(+), 31 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/c53eda03/mllib/src/main/scala/org/apache/spark/ml/source/libsvm/LibSVMRelation.scala
--
diff --git 
a/mllib/src/main/scala/org/apache/spark/ml/source/libsvm/LibSVMRelation.scala 
b/mllib/src/main/scala/org/apache/spark/ml/source/libsvm/LibSVMRelation.scala
index 62e09d2..4988dd6 100644
--- 
a/mllib/src/main/scala/org/apache/spark/ml/source/libsvm/LibSVMRelation.scala
+++ 
b/mllib/src/main/scala/org/apache/spark/ml/source/libsvm/LibSVMRelation.scala
@@ -120,9 +120,12 @@ class LibSVMFileFormat extends TextBasedFileFormat with 
DataSourceRegister {
   override def toString: String = "LibSVM"
 
   private def verifySchema(dataSchema: StructType): Unit = {
-if (dataSchema.size != 2 ||
-  (!dataSchema(0).dataType.sameType(DataTypes.DoubleType)
-|| !dataSchema(1).dataType.sameType(new VectorUDT( {
+if (
+  dataSchema.size != 2 ||
+!dataSchema(0).dataType.sameType(DataTypes.DoubleType) ||
+!dataSchema(1).dataType.sameType(new VectorUDT()) ||
+!(dataSchema(1).metadata.getLong("numFeatures").toInt > 0)
+) {
   throw new IOException(s"Illegal schema for libsvm data, 
schema=$dataSchema")
 }
   }
@@ -131,17 +134,8 @@ class LibSVMFileFormat extends TextBasedFileFormat with 
DataSourceRegister {
   sparkSession: SparkSession,
   options: Map[String, String],
   files: Seq[FileStatus]): Option[StructType] = {
-Some(
-  StructType(
-StructField("label", DoubleType, nullable = false) ::
-StructField("features", new VectorUDT(), nullable = false) :: Nil))
-  }
-
-  override def prepareRead(
-  sparkSession: SparkSession,
-  options: Map[String, String],
-  files: Seq[FileStatus]): Map[String, String] = {
-val numFeatures = options.get("numFeatures").filter(_.toInt > 0).getOrElse 
{
+val numFeatures: Int = options.get("numFeatures").map(_.toInt).filter(_ > 
0).getOrElse {
+  // Infers number of features if the user doesn't specify (a valid) one.
   val dataFiles = files.filterNot(_.getPath.getName startsWith "_")
   val path = if (dataFiles.length == 1) {
 dataFiles.head.getPath.toUri.toString
@@ -156,7 +150,14 @@ class LibSVMFileFormat extends TextBasedFileFormat with 
DataSourceRegister {
   MLUtils.computeNumFeatures(parsed)
 }
 
-new CaseInsensitiveMap(options + ("numFeatures" -> numFeatures.toString))
+val featuresMetadata = new MetadataBuilder()
+  .putLong("numFeatures", numFeatures)
+  .build()
+
+Some(
+  StructType(
+StructField("label", DoubleType, nullable = false) ::
+StructField("features", new VectorUDT(), nullable = false, 
featuresMetadata) :: Nil))
   }
 
   override def prepareWrite(
@@ -185,7 +186,7 @@ class LibSVMFileFormat extends TextBasedFileFormat with 
DataSourceRegister {
   options: Map[String, String],
   hadoopConf: Configuration): (PartitionedFile) => Iterator[InternalRow] = 
{
 verifySchema(dataSchema)
-val numFeatures = options("numFeatures").toInt
+val numFeatures = 
dataSchema("features").metadata.getLong("numFeatures").toInt
 assert(numFeatures > 0)
 
 val sparse = options.getOrElse("vec

spark git commit: [SPARK-15983][SQL] Removes FileFormat.prepareRead

2016-06-16 Thread wenchen
Repository: spark
Updated Branches:
  refs/heads/master 6451cf927 -> 9ea0d5e32


[SPARK-15983][SQL] Removes FileFormat.prepareRead

## What changes were proposed in this pull request?

Interface method `FileFormat.prepareRead()` was added in #12088 to handle a 
special case in the LibSVM data source.

However, the semantics of this interface method isn't intuitive: it returns a 
modified version of the data source options map. Considering that the LibSVM 
case can be easily handled using schema metadata inside `inferSchema`, we can 
remove this interface method to keep the `FileFormat` interface clean.

## How was this patch tested?

Existing tests.

Author: Cheng Lian 

Closes #13698 from liancheng/remove-prepare-read.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/9ea0d5e3
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/9ea0d5e3
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/9ea0d5e3

Branch: refs/heads/master
Commit: 9ea0d5e326e08b914aa46f1eec8795688a61bf74
Parents: 6451cf9
Author: Cheng Lian 
Authored: Thu Jun 16 10:24:29 2016 -0700
Committer: Wenchen Fan 
Committed: Thu Jun 16 10:24:29 2016 -0700

--
 .../spark/ml/source/libsvm/LibSVMRelation.scala | 33 ++--
 .../sql/execution/datasources/DataSource.scala  |  5 +--
 .../datasources/fileSourceInterfaces.scala  | 11 ---
 3 files changed, 18 insertions(+), 31 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/9ea0d5e3/mllib/src/main/scala/org/apache/spark/ml/source/libsvm/LibSVMRelation.scala
--
diff --git 
a/mllib/src/main/scala/org/apache/spark/ml/source/libsvm/LibSVMRelation.scala 
b/mllib/src/main/scala/org/apache/spark/ml/source/libsvm/LibSVMRelation.scala
index 62e09d2..4988dd6 100644
--- 
a/mllib/src/main/scala/org/apache/spark/ml/source/libsvm/LibSVMRelation.scala
+++ 
b/mllib/src/main/scala/org/apache/spark/ml/source/libsvm/LibSVMRelation.scala
@@ -120,9 +120,12 @@ class LibSVMFileFormat extends TextBasedFileFormat with 
DataSourceRegister {
   override def toString: String = "LibSVM"
 
   private def verifySchema(dataSchema: StructType): Unit = {
-if (dataSchema.size != 2 ||
-  (!dataSchema(0).dataType.sameType(DataTypes.DoubleType)
-|| !dataSchema(1).dataType.sameType(new VectorUDT( {
+if (
+  dataSchema.size != 2 ||
+!dataSchema(0).dataType.sameType(DataTypes.DoubleType) ||
+!dataSchema(1).dataType.sameType(new VectorUDT()) ||
+!(dataSchema(1).metadata.getLong("numFeatures").toInt > 0)
+) {
   throw new IOException(s"Illegal schema for libsvm data, 
schema=$dataSchema")
 }
   }
@@ -131,17 +134,8 @@ class LibSVMFileFormat extends TextBasedFileFormat with 
DataSourceRegister {
   sparkSession: SparkSession,
   options: Map[String, String],
   files: Seq[FileStatus]): Option[StructType] = {
-Some(
-  StructType(
-StructField("label", DoubleType, nullable = false) ::
-StructField("features", new VectorUDT(), nullable = false) :: Nil))
-  }
-
-  override def prepareRead(
-  sparkSession: SparkSession,
-  options: Map[String, String],
-  files: Seq[FileStatus]): Map[String, String] = {
-val numFeatures = options.get("numFeatures").filter(_.toInt > 0).getOrElse 
{
+val numFeatures: Int = options.get("numFeatures").map(_.toInt).filter(_ > 
0).getOrElse {
+  // Infers number of features if the user doesn't specify (a valid) one.
   val dataFiles = files.filterNot(_.getPath.getName startsWith "_")
   val path = if (dataFiles.length == 1) {
 dataFiles.head.getPath.toUri.toString
@@ -156,7 +150,14 @@ class LibSVMFileFormat extends TextBasedFileFormat with 
DataSourceRegister {
   MLUtils.computeNumFeatures(parsed)
 }
 
-new CaseInsensitiveMap(options + ("numFeatures" -> numFeatures.toString))
+val featuresMetadata = new MetadataBuilder()
+  .putLong("numFeatures", numFeatures)
+  .build()
+
+Some(
+  StructType(
+StructField("label", DoubleType, nullable = false) ::
+StructField("features", new VectorUDT(), nullable = false, 
featuresMetadata) :: Nil))
   }
 
   override def prepareWrite(
@@ -185,7 +186,7 @@ class LibSVMFileFormat extends TextBasedFileFormat with 
DataSourceRegister {
   options: Map[String, String],
   hadoopConf: Configuration): (PartitionedFile) => Iterator[InternalRow] = 
{
 verifySchema(dataSchema)
-val numFeatures = options("numFeatures").toInt
+val numFeatures = 
dataSchema("features").metadata.getLong("numFeatures").toInt
 assert(numFeatures > 0)
 
 val sparse = options.getOrElse("vectorType", "sparse") == "sparse"

http://git-wip-us.apache.org/repos/asf/spark/blob/9ea0d5e3/sql/core/src/m

spark git commit: [SPARK-15862][SQL] Better Error Message When Having Database Name in CACHE TABLE AS SELECT

2016-06-16 Thread lian
Repository: spark
Updated Branches:
  refs/heads/branch-2.0 52cb1ad38 -> 26359d27c


[SPARK-15862][SQL] Better Error Message When Having Database Name in CACHE 
TABLE AS SELECT

 What changes were proposed in this pull request?
~~If the temp table already exists, we should not silently replace it when 
doing `CACHE TABLE AS SELECT`. This is inconsistent with the behavior of `CREAT 
VIEW` or `CREATE TABLE`. This PR is to fix this silent drop.~~

~~Maybe, we also can introduce new syntax for replacing the existing one. For 
example, in Hive, to replace a view, the syntax should be like `ALTER VIEW AS 
SELECT` or `CREATE OR REPLACE VIEW AS SELECT`~~

The table name in `CACHE TABLE AS SELECT` should NOT contain database prefix 
like "database.table". Thus, this PR captures this in Parser and outputs a 
better error message, instead of reporting the view already exists.

In addition, refactoring the `Parser` to generate table identifiers instead of 
returning the table name string.

 How was this patch tested?
- Added a test case for caching and uncaching qualified table names
- Fixed a few test cases that do not drop temp table at the end
- Added the related test case for the issue resolved in this PR

Author: gatorsmile 
Author: xiaoli 
Author: Xiao Li 

Closes #13572 from gatorsmile/cacheTableAsSelect.

(cherry picked from commit 6451cf9270b55465d8ecea4c4031329a1058561a)
Signed-off-by: Cheng Lian 


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/26359d27
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/26359d27
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/26359d27

Branch: refs/heads/branch-2.0
Commit: 26359d27c47ae3ec53e442de3884ec9245d15cee
Parents: 52cb1ad
Author: gatorsmile 
Authored: Thu Jun 16 10:01:59 2016 -0700
Committer: Cheng Lian 
Committed: Thu Jun 16 10:02:12 2016 -0700

--
 .../apache/spark/sql/catalyst/parser/SqlBase.g4 |  4 +-
 .../spark/sql/execution/SparkSqlParser.scala| 10 ++-
 .../spark/sql/execution/command/cache.scala | 20 ++---
 .../spark/sql/execution/command/views.scala |  2 +-
 .../org/apache/spark/sql/CachedTableSuite.scala | 68 +
 .../apache/spark/sql/hive/test/TestHive.scala   |  2 +-
 .../spark/sql/hive/CachedTableSuite.scala   | 79 +++-
 7 files changed, 121 insertions(+), 64 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/26359d27/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
--
diff --git 
a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 
b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
index 044f910..b603196 100644
--- 
a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
+++ 
b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
@@ -114,8 +114,8 @@ statement
 tableIdentifier partitionSpec? describeColName?
#describeTable
 | REFRESH TABLE tableIdentifier
#refreshTable
 | REFRESH .*?  
#refreshResource
-| CACHE LAZY? TABLE identifier (AS? query)?
#cacheTable
-| UNCACHE TABLE identifier 
#uncacheTable
+| CACHE LAZY? TABLE tableIdentifier (AS? query)?   
#cacheTable
+| UNCACHE TABLE tableIdentifier
#uncacheTable
 | CLEAR CACHE  
#clearCache
 | LOAD DATA LOCAL? INPATH path=STRING OVERWRITE? INTO TABLE
 tableIdentifier partitionSpec? 
#loadData

http://git-wip-us.apache.org/repos/asf/spark/blob/26359d27/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
--
diff --git 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
index a0508ad..154c25a 100644
--- 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
+++ 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
@@ -221,14 +221,20 @@ class SparkSqlAstBuilder(conf: SQLConf) extends 
AstBuilder {
*/
   override def visitCacheTable(ctx: CacheTableContext): LogicalPlan = 
withOrigin(ctx) {
 val query = Option(ctx.query).map(plan)
-CacheTableCommand(ctx.identifier.getText, query, ctx.LAZY != null)
+val tableIdent = visitTableIdentifier(ctx.tableIdentifier)
+if (query.isDefined && tableIdent.database.isDefined) {
+  val d

spark git commit: [SPARK-15862][SQL] Better Error Message When Having Database Name in CACHE TABLE AS SELECT

2016-06-16 Thread lian
Repository: spark
Updated Branches:
  refs/heads/master 7c6c69263 -> 6451cf927


[SPARK-15862][SQL] Better Error Message When Having Database Name in CACHE 
TABLE AS SELECT

 What changes were proposed in this pull request?
~~If the temp table already exists, we should not silently replace it when 
doing `CACHE TABLE AS SELECT`. This is inconsistent with the behavior of `CREAT 
VIEW` or `CREATE TABLE`. This PR is to fix this silent drop.~~

~~Maybe, we also can introduce new syntax for replacing the existing one. For 
example, in Hive, to replace a view, the syntax should be like `ALTER VIEW AS 
SELECT` or `CREATE OR REPLACE VIEW AS SELECT`~~

The table name in `CACHE TABLE AS SELECT` should NOT contain database prefix 
like "database.table". Thus, this PR captures this in Parser and outputs a 
better error message, instead of reporting the view already exists.

In addition, refactoring the `Parser` to generate table identifiers instead of 
returning the table name string.

 How was this patch tested?
- Added a test case for caching and uncaching qualified table names
- Fixed a few test cases that do not drop temp table at the end
- Added the related test case for the issue resolved in this PR

Author: gatorsmile 
Author: xiaoli 
Author: Xiao Li 

Closes #13572 from gatorsmile/cacheTableAsSelect.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/6451cf92
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/6451cf92
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/6451cf92

Branch: refs/heads/master
Commit: 6451cf9270b55465d8ecea4c4031329a1058561a
Parents: 7c6c692
Author: gatorsmile 
Authored: Thu Jun 16 10:01:59 2016 -0700
Committer: Cheng Lian 
Committed: Thu Jun 16 10:01:59 2016 -0700

--
 .../apache/spark/sql/catalyst/parser/SqlBase.g4 |  4 +-
 .../spark/sql/execution/SparkSqlParser.scala| 10 ++-
 .../spark/sql/execution/command/cache.scala | 20 ++---
 .../spark/sql/execution/command/views.scala |  2 +-
 .../org/apache/spark/sql/CachedTableSuite.scala | 68 +
 .../apache/spark/sql/hive/test/TestHive.scala   |  2 +-
 .../spark/sql/hive/CachedTableSuite.scala   | 79 +++-
 7 files changed, 121 insertions(+), 64 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/6451cf92/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
--
diff --git 
a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 
b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
index 044f910..b603196 100644
--- 
a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
+++ 
b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
@@ -114,8 +114,8 @@ statement
 tableIdentifier partitionSpec? describeColName?
#describeTable
 | REFRESH TABLE tableIdentifier
#refreshTable
 | REFRESH .*?  
#refreshResource
-| CACHE LAZY? TABLE identifier (AS? query)?
#cacheTable
-| UNCACHE TABLE identifier 
#uncacheTable
+| CACHE LAZY? TABLE tableIdentifier (AS? query)?   
#cacheTable
+| UNCACHE TABLE tableIdentifier
#uncacheTable
 | CLEAR CACHE  
#clearCache
 | LOAD DATA LOCAL? INPATH path=STRING OVERWRITE? INTO TABLE
 tableIdentifier partitionSpec? 
#loadData

http://git-wip-us.apache.org/repos/asf/spark/blob/6451cf92/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
--
diff --git 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
index a0508ad..154c25a 100644
--- 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
+++ 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
@@ -221,14 +221,20 @@ class SparkSqlAstBuilder(conf: SQLConf) extends 
AstBuilder {
*/
   override def visitCacheTable(ctx: CacheTableContext): LogicalPlan = 
withOrigin(ctx) {
 val query = Option(ctx.query).map(plan)
-CacheTableCommand(ctx.identifier.getText, query, ctx.LAZY != null)
+val tableIdent = visitTableIdentifier(ctx.tableIdentifier)
+if (query.isDefined && tableIdent.database.isDefined) {
+  val database = tableIdent.database.get
+  throw new ParseException(s"It is not allowed to add database pre

[2/3] spark git commit: [SPARK-15979][SQL] Rename various Parquet support classes (branch-2.0).

2016-06-16 Thread rxin
http://git-wip-us.apache.org/repos/asf/spark/blob/52cb1ad3/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/CatalystWriteSupport.scala
--
diff --git 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/CatalystWriteSupport.scala
 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/CatalystWriteSupport.scala
deleted file mode 100644
index 67bfd39..000
--- 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/CatalystWriteSupport.scala
+++ /dev/null
@@ -1,436 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.execution.datasources.parquet
-
-import java.nio.{ByteBuffer, ByteOrder}
-import java.util
-
-import scala.collection.JavaConverters.mapAsJavaMapConverter
-
-import org.apache.hadoop.conf.Configuration
-import org.apache.parquet.column.ParquetProperties
-import org.apache.parquet.hadoop.ParquetOutputFormat
-import org.apache.parquet.hadoop.api.WriteSupport
-import org.apache.parquet.hadoop.api.WriteSupport.WriteContext
-import org.apache.parquet.io.api.{Binary, RecordConsumer}
-
-import org.apache.spark.internal.Logging
-import org.apache.spark.sql.catalyst.InternalRow
-import org.apache.spark.sql.catalyst.expressions.SpecializedGetters
-import org.apache.spark.sql.catalyst.util.DateTimeUtils
-import 
org.apache.spark.sql.execution.datasources.parquet.CatalystSchemaConverter.minBytesForPrecision
-import org.apache.spark.sql.internal.SQLConf
-import org.apache.spark.sql.types._
-
-/**
- * A Parquet [[WriteSupport]] implementation that writes Catalyst 
[[InternalRow]]s as Parquet
- * messages.  This class can write Parquet data in two modes:
- *
- *  - Standard mode: Parquet data are written in standard format defined in 
parquet-format spec.
- *  - Legacy mode: Parquet data are written in legacy format compatible with 
Spark 1.4 and prior.
- *
- * This behavior can be controlled by SQL option 
`spark.sql.parquet.writeLegacyFormat`.  The value
- * of this option is propagated to this class by the `init()` method and its 
Hadoop configuration
- * argument.
- */
-private[parquet] class CatalystWriteSupport extends WriteSupport[InternalRow] 
with Logging {
-  // A `ValueWriter` is responsible for writing a field of an `InternalRow` to 
the record consumer.
-  // Here we are using `SpecializedGetters` rather than `InternalRow` so that 
we can directly access
-  // data in `ArrayData` without the help of `SpecificMutableRow`.
-  private type ValueWriter = (SpecializedGetters, Int) => Unit
-
-  // Schema of the `InternalRow`s to be written
-  private var schema: StructType = _
-
-  // `ValueWriter`s for all fields of the schema
-  private var rootFieldWriters: Seq[ValueWriter] = _
-
-  // The Parquet `RecordConsumer` to which all `InternalRow`s are written
-  private var recordConsumer: RecordConsumer = _
-
-  // Whether to write data in legacy Parquet format compatible with Spark 1.4 
and prior versions
-  private var writeLegacyParquetFormat: Boolean = _
-
-  // Reusable byte array used to write timestamps as Parquet INT96 values
-  private val timestampBuffer = new Array[Byte](12)
-
-  // Reusable byte array used to write decimal values
-  private val decimalBuffer = new 
Array[Byte](minBytesForPrecision(DecimalType.MAX_PRECISION))
-
-  override def init(configuration: Configuration): WriteContext = {
-val schemaString = configuration.get(CatalystWriteSupport.SPARK_ROW_SCHEMA)
-this.schema = StructType.fromString(schemaString)
-this.writeLegacyParquetFormat = {
-  // `SQLConf.PARQUET_WRITE_LEGACY_FORMAT` should always be explicitly set 
in ParquetRelation
-  assert(configuration.get(SQLConf.PARQUET_WRITE_LEGACY_FORMAT.key) != 
null)
-  configuration.get(SQLConf.PARQUET_WRITE_LEGACY_FORMAT.key).toBoolean
-}
-this.rootFieldWriters = schema.map(_.dataType).map(makeWriter)
-
-val messageType = new 
CatalystSchemaConverter(configuration).convert(schema)
-val metadata = Map(CatalystReadSupport.SPARK_METADATA_KEY -> 
schemaString).asJava
-
-logInfo(
-  s"""Initialized Parquet WriteSupport with Catalyst schem

[1/3] spark git commit: [SPARK-15979][SQL] Rename various Parquet support classes (branch-2.0).

2016-06-16 Thread rxin
Repository: spark
Updated Branches:
  refs/heads/branch-2.0 35c0a60a6 -> 52cb1ad38


http://git-wip-us.apache.org/repos/asf/spark/blob/52cb1ad3/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetSchemaConverter.scala
--
diff --git 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetSchemaConverter.scala
 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetSchemaConverter.scala
new file mode 100644
index 000..1ac083f
--- /dev/null
+++ 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetSchemaConverter.scala
@@ -0,0 +1,579 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.datasources.parquet
+
+import scala.collection.JavaConverters._
+
+import org.apache.hadoop.conf.Configuration
+import org.apache.parquet.schema._
+import org.apache.parquet.schema.OriginalType._
+import org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName._
+import org.apache.parquet.schema.Type.Repetition._
+
+import org.apache.spark.sql.AnalysisException
+import 
org.apache.spark.sql.execution.datasources.parquet.ParquetSchemaConverter.maxPrecisionForBytes
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.types._
+
+/**
+ * This converter class is used to convert Parquet [[MessageType]] to Spark 
SQL [[StructType]] and
+ * vice versa.
+ *
+ * Parquet format backwards-compatibility rules are respected when converting 
Parquet
+ * [[MessageType]] schemas.
+ *
+ * @see https://github.com/apache/parquet-format/blob/master/LogicalTypes.md
+ * @constructor
+ * @param assumeBinaryIsString Whether unannotated BINARY fields should be 
assumed to be Spark SQL
+ *[[StringType]] fields when converting Parquet a [[MessageType]] to 
Spark SQL
+ *[[StructType]].  This argument only affects Parquet read path.
+ * @param assumeInt96IsTimestamp Whether unannotated INT96 fields should be 
assumed to be Spark SQL
+ *[[TimestampType]] fields when converting Parquet a [[MessageType]] 
to Spark SQL
+ *[[StructType]].  Note that Spark SQL [[TimestampType]] is similar to 
Hive timestamp, which
+ *has optional nanosecond precision, but different from `TIME_MILLS` 
and `TIMESTAMP_MILLIS`
+ *described in Parquet format spec.  This argument only affects 
Parquet read path.
+ * @param writeLegacyParquetFormat Whether to use legacy Parquet format 
compatible with Spark 1.4
+ *and prior versions when converting a Catalyst [[StructType]] to a 
Parquet [[MessageType]].
+ *When set to false, use standard format defined in parquet-format 
spec.  This argument only
+ *affects Parquet write path.
+ */
+private[parquet] class ParquetSchemaConverter(
+assumeBinaryIsString: Boolean = 
SQLConf.PARQUET_BINARY_AS_STRING.defaultValue.get,
+assumeInt96IsTimestamp: Boolean = 
SQLConf.PARQUET_INT96_AS_TIMESTAMP.defaultValue.get,
+writeLegacyParquetFormat: Boolean = 
SQLConf.PARQUET_WRITE_LEGACY_FORMAT.defaultValue.get) {
+
+  def this(conf: SQLConf) = this(
+assumeBinaryIsString = conf.isParquetBinaryAsString,
+assumeInt96IsTimestamp = conf.isParquetINT96AsTimestamp,
+writeLegacyParquetFormat = conf.writeLegacyParquetFormat)
+
+  def this(conf: Configuration) = this(
+assumeBinaryIsString = 
conf.get(SQLConf.PARQUET_BINARY_AS_STRING.key).toBoolean,
+assumeInt96IsTimestamp = 
conf.get(SQLConf.PARQUET_INT96_AS_TIMESTAMP.key).toBoolean,
+writeLegacyParquetFormat = 
conf.get(SQLConf.PARQUET_WRITE_LEGACY_FORMAT.key,
+  SQLConf.PARQUET_WRITE_LEGACY_FORMAT.defaultValue.get.toString).toBoolean)
+
+  /**
+   * Converts Parquet [[MessageType]] `parquetSchema` to a Spark SQL 
[[StructType]].
+   */
+  def convert(parquetSchema: MessageType): StructType = 
convert(parquetSchema.asGroupType())
+
+  private def convert(parquetSchema: GroupType): StructType = {
+val fields = parquetSchema.getFields.asScala.map { field =>
+  field.getRepetition match {
+case OPTIONAL =>
+  StructField(field.getName, convertField(field), nullable = true)
+
+   

[3/3] spark git commit: [SPARK-15979][SQL] Rename various Parquet support classes (branch-2.0).

2016-06-16 Thread rxin
[SPARK-15979][SQL] Rename various Parquet support classes (branch-2.0).

## What changes were proposed in this pull request?
This patch renames various Parquet support classes from CatalystAbc to 
ParquetAbc. This new naming makes more sense for two reasons:

1. These are not optimizer related (i.e. Catalyst) classes.
2. We are in the Spark code base, and as a result it'd be more clear to call 
out these are Parquet support classes, rather than some Spark classes.

## How was this patch tested?
Renamed test cases as well.

Author: Reynold Xin 

Closes #13700 from rxin/parquet-rename-branch-2.0.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/52cb1ad3
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/52cb1ad3
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/52cb1ad3

Branch: refs/heads/branch-2.0
Commit: 52cb1ad38f669dca3f276f38a3f75d57d973e982
Parents: 35c0a60
Author: Reynold Xin 
Authored: Thu Jun 16 00:21:08 2016 -0700
Committer: Reynold Xin 
Committed: Thu Jun 16 00:21:08 2016 -0700

--
 .../spark/ml/source/libsvm/LibSVMRelation.scala |   6 +-
 .../SpecificParquetRecordReaderBase.java|   4 +-
 .../parquet/VectorizedColumnReader.java |  12 +-
 .../parquet/CatalystReadSupport.scala   | 302 -
 .../parquet/CatalystRecordMaterializer.scala|  41 --
 .../parquet/CatalystRowConverter.scala  | 672 ---
 .../parquet/CatalystSchemaConverter.scala   | 579 
 .../parquet/CatalystWriteSupport.scala  | 436 
 .../datasources/parquet/ParquetFileFormat.scala |  44 +-
 .../parquet/ParquetReadSupport.scala| 302 +
 .../parquet/ParquetRecordMaterializer.scala |  41 ++
 .../parquet/ParquetRowConverter.scala   | 672 +++
 .../parquet/ParquetSchemaConverter.scala| 579 
 .../parquet/ParquetWriteSupport.scala   | 436 
 .../datasources/parquet/ParquetIOSuite.scala|   4 +-
 .../datasources/parquet/ParquetQuerySuite.scala |   6 +-
 .../parquet/ParquetSchemaSuite.scala|   6 +-
 .../datasources/parquet/ParquetTest.scala   |   4 +-
 18 files changed, 2071 insertions(+), 2075 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/52cb1ad3/mllib/src/main/scala/org/apache/spark/ml/source/libsvm/LibSVMRelation.scala
--
diff --git 
a/mllib/src/main/scala/org/apache/spark/ml/source/libsvm/LibSVMRelation.scala 
b/mllib/src/main/scala/org/apache/spark/ml/source/libsvm/LibSVMRelation.scala
index b5b2a68..62e09d2 100644
--- 
a/mllib/src/main/scala/org/apache/spark/ml/source/libsvm/LibSVMRelation.scala
+++ 
b/mllib/src/main/scala/org/apache/spark/ml/source/libsvm/LibSVMRelation.scala
@@ -141,7 +141,7 @@ class LibSVMFileFormat extends TextBasedFileFormat with 
DataSourceRegister {
   sparkSession: SparkSession,
   options: Map[String, String],
   files: Seq[FileStatus]): Map[String, String] = {
-def computeNumFeatures(): Int = {
+val numFeatures = options.get("numFeatures").filter(_.toInt > 0).getOrElse 
{
   val dataFiles = files.filterNot(_.getPath.getName startsWith "_")
   val path = if (dataFiles.length == 1) {
 dataFiles.head.getPath.toUri.toString
@@ -156,10 +156,6 @@ class LibSVMFileFormat extends TextBasedFileFormat with 
DataSourceRegister {
   MLUtils.computeNumFeatures(parsed)
 }
 
-val numFeatures = options.get("numFeatures").filter(_.toInt > 0).getOrElse 
{
-  computeNumFeatures()
-}
-
 new CaseInsensitiveMap(options + ("numFeatures" -> numFeatures.toString))
   }
 

http://git-wip-us.apache.org/repos/asf/spark/blob/52cb1ad3/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/SpecificParquetRecordReaderBase.java
--
diff --git 
a/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/SpecificParquetRecordReaderBase.java
 
b/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/SpecificParquetRecordReaderBase.java
index cbe8f78..1a25679 100644
--- 
a/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/SpecificParquetRecordReaderBase.java
+++ 
b/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/SpecificParquetRecordReaderBase.java
@@ -136,7 +136,7 @@ public abstract class SpecificParquetRecordReaderBase 
extends RecordReader 
extends RecordReaderhttp://git-wip-us.apache.org/repos/asf/spark/blob/52cb1ad3/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/VectorizedColumnReader.java
--
diff --git 
a/sql/core/src/main