spark git commit: [SPARK-16278][SPARK-16279][SQL] Implement map_keys/map_values SQL functions

2016-07-07 Thread rxin
Repository: spark
Updated Branches:
  refs/heads/branch-2.0 88603bd4f -> 7ef1d1c61


[SPARK-16278][SPARK-16279][SQL] Implement map_keys/map_values SQL functions

This PR adds `map_keys` and `map_values` SQL functions in order to remove Hive 
fallback.

Pass the Jenkins tests including new testcases.

Author: Dongjoon Hyun 

Closes #13967 from dongjoon-hyun/SPARK-16278.

(cherry picked from commit 54b27c1797fcd32b3f3e9d44e1a149ae396a61e6)
Signed-off-by: Reynold Xin 


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/7ef1d1c6
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/7ef1d1c6
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/7ef1d1c6

Branch: refs/heads/branch-2.0
Commit: 7ef1d1c618100313dbbdb6f615d9f87ff67e895d
Parents: 88603bd
Author: Dongjoon Hyun 
Authored: Sun Jul 3 16:59:40 2016 +0800
Committer: Reynold Xin 
Committed: Thu Jul 7 21:02:50 2016 -0700

--
 .../catalyst/analysis/FunctionRegistry.scala|  2 +
 .../expressions/collectionOperations.scala  | 48 
 .../expressions/CollectionFunctionsSuite.scala  | 13 ++
 .../spark/sql/DataFrameFunctionsSuite.scala | 16 +++
 .../spark/sql/hive/HiveSessionCatalog.scala |  1 -
 5 files changed, 79 insertions(+), 1 deletion(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/7ef1d1c6/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
--
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
index 95be0d6..27c3a09 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
@@ -170,6 +170,8 @@ object FunctionRegistry {
 expression[IsNotNull]("isnotnull"),
 expression[Least]("least"),
 expression[CreateMap]("map"),
+expression[MapKeys]("map_keys"),
+expression[MapValues]("map_values"),
 expression[CreateNamedStruct]("named_struct"),
 expression[NaNvl]("nanvl"),
 expression[NullIf]("nullif"),

http://git-wip-us.apache.org/repos/asf/spark/blob/7ef1d1c6/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala
--
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala
index c71cb73..2e8ea11 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala
@@ -44,6 +44,54 @@ case class Size(child: Expression) extends UnaryExpression 
with ExpectsInputType
 }
 
 /**
+ * Returns an unordered array containing the keys of the map.
+ */
+@ExpressionDescription(
+  usage = "_FUNC_(map) - Returns an unordered array containing the keys of the 
map.",
+  extended = " > SELECT _FUNC_(map(1, 'a', 2, 'b'));\n [1,2]")
+case class MapKeys(child: Expression)
+  extends UnaryExpression with ExpectsInputTypes {
+
+  override def inputTypes: Seq[AbstractDataType] = Seq(MapType)
+
+  override def dataType: DataType = 
ArrayType(child.dataType.asInstanceOf[MapType].keyType)
+
+  override def nullSafeEval(map: Any): Any = {
+map.asInstanceOf[MapData].keyArray()
+  }
+
+  override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
+nullSafeCodeGen(ctx, ev, c => s"${ev.value} = ($c).keyArray();")
+  }
+
+  override def prettyName: String = "map_keys"
+}
+
+/**
+ * Returns an unordered array containing the values of the map.
+ */
+@ExpressionDescription(
+  usage = "_FUNC_(map) - Returns an unordered array containing the values of 
the map.",
+  extended = " > SELECT _FUNC_(map(1, 'a', 2, 'b'));\n [\"a\",\"b\"]")
+case class MapValues(child: Expression)
+  extends UnaryExpression with ExpectsInputTypes {
+
+  override def inputTypes: Seq[AbstractDataType] = Seq(MapType)
+
+  override def dataType: DataType = 
ArrayType(child.dataType.asInstanceOf[MapType].valueType)
+
+  override def nullSafeEval(map: Any): Any = {
+map.asInstanceOf[MapData].valueArray()
+  }
+
+  override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
+nullSafeCodeGen(ctx, ev, c => s"${ev.value} = ($c).valueArray();")
+  }
+
+  override def prettyName: String = "map_values"
+}
+
+/**
  * Sorts the input array in 

spark git commit: [SPARK-16278][SPARK-16279][SQL] Implement map_keys/map_values SQL functions

2016-07-03 Thread wenchen
Repository: spark
Updated Branches:
  refs/heads/master ea990f969 -> 54b27c179


[SPARK-16278][SPARK-16279][SQL] Implement map_keys/map_values SQL functions

## What changes were proposed in this pull request?

This PR adds `map_keys` and `map_values` SQL functions in order to remove Hive 
fallback.

## How was this patch tested?

Pass the Jenkins tests including new testcases.

Author: Dongjoon Hyun 

Closes #13967 from dongjoon-hyun/SPARK-16278.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/54b27c17
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/54b27c17
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/54b27c17

Branch: refs/heads/master
Commit: 54b27c1797fcd32b3f3e9d44e1a149ae396a61e6
Parents: ea990f9
Author: Dongjoon Hyun 
Authored: Sun Jul 3 16:59:40 2016 +0800
Committer: Wenchen Fan 
Committed: Sun Jul 3 16:59:40 2016 +0800

--
 .../catalyst/analysis/FunctionRegistry.scala|  2 +
 .../expressions/collectionOperations.scala  | 48 
 .../expressions/CollectionFunctionsSuite.scala  | 13 ++
 .../spark/sql/DataFrameFunctionsSuite.scala | 16 +++
 .../spark/sql/hive/HiveSessionCatalog.scala |  1 -
 5 files changed, 79 insertions(+), 1 deletion(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/54b27c17/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
--
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
index 26b0c30..e7f335f 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
@@ -171,6 +171,8 @@ object FunctionRegistry {
 expression[IsNotNull]("isnotnull"),
 expression[Least]("least"),
 expression[CreateMap]("map"),
+expression[MapKeys]("map_keys"),
+expression[MapValues]("map_values"),
 expression[CreateNamedStruct]("named_struct"),
 expression[NaNvl]("nanvl"),
 expression[NullIf]("nullif"),

http://git-wip-us.apache.org/repos/asf/spark/blob/54b27c17/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala
--
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala
index c71cb73..2e8ea11 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala
@@ -44,6 +44,54 @@ case class Size(child: Expression) extends UnaryExpression 
with ExpectsInputType
 }
 
 /**
+ * Returns an unordered array containing the keys of the map.
+ */
+@ExpressionDescription(
+  usage = "_FUNC_(map) - Returns an unordered array containing the keys of the 
map.",
+  extended = " > SELECT _FUNC_(map(1, 'a', 2, 'b'));\n [1,2]")
+case class MapKeys(child: Expression)
+  extends UnaryExpression with ExpectsInputTypes {
+
+  override def inputTypes: Seq[AbstractDataType] = Seq(MapType)
+
+  override def dataType: DataType = 
ArrayType(child.dataType.asInstanceOf[MapType].keyType)
+
+  override def nullSafeEval(map: Any): Any = {
+map.asInstanceOf[MapData].keyArray()
+  }
+
+  override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
+nullSafeCodeGen(ctx, ev, c => s"${ev.value} = ($c).keyArray();")
+  }
+
+  override def prettyName: String = "map_keys"
+}
+
+/**
+ * Returns an unordered array containing the values of the map.
+ */
+@ExpressionDescription(
+  usage = "_FUNC_(map) - Returns an unordered array containing the values of 
the map.",
+  extended = " > SELECT _FUNC_(map(1, 'a', 2, 'b'));\n [\"a\",\"b\"]")
+case class MapValues(child: Expression)
+  extends UnaryExpression with ExpectsInputTypes {
+
+  override def inputTypes: Seq[AbstractDataType] = Seq(MapType)
+
+  override def dataType: DataType = 
ArrayType(child.dataType.asInstanceOf[MapType].valueType)
+
+  override def nullSafeEval(map: Any): Any = {
+map.asInstanceOf[MapData].valueArray()
+  }
+
+  override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
+nullSafeCodeGen(ctx, ev, c => s"${ev.value} = ($c).valueArray();")
+  }
+
+  override def prettyName: String = "map_values"
+}
+
+/**
  * Sorts the input array in ascending / descending order according to