spark git commit: [SPARK-11899][SQL] API audit for GroupedDataset.

2015-11-21 Thread rxin
Repository: spark
Updated Branches:
  refs/heads/master 596710268 -> ff442bbcf


[SPARK-11899][SQL] API audit for GroupedDataset.

1. Renamed map to mapGroup, flatMap to flatMapGroup.
2. Renamed asKey -> keyAs.
3. Added more documentation.
4. Changed type parameter T to V on GroupedDataset.
5. Added since versions for all functions.

Author: Reynold Xin 

Closes #9880 from rxin/SPARK-11899.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/ff442bbc
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/ff442bbc
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/ff442bbc

Branch: refs/heads/master
Commit: ff442bbcffd4f93cfcc2f76d160011e725d2fb3f
Parents: 5967102
Author: Reynold Xin 
Authored: Sat Nov 21 15:00:37 2015 -0800
Committer: Reynold Xin 
Committed: Sat Nov 21 15:00:37 2015 -0800

--
 .../api/java/function/MapGroupFunction.java |   2 +-
 .../scala/org/apache/spark/sql/Encoder.scala|   4 +
 .../spark/sql/catalyst/JavaTypeInference.scala  |   3 +-
 .../scala/org/apache/spark/sql/Column.scala |   2 +
 .../scala/org/apache/spark/sql/DataFrame.scala  |   1 -
 .../org/apache/spark/sql/GroupedDataset.scala   | 132 +++
 .../org/apache/spark/sql/JavaDatasetSuite.java  |   8 +-
 .../spark/sql/DatasetPrimitiveSuite.scala   |   4 +-
 .../org/apache/spark/sql/DatasetSuite.scala |  20 +--
 9 files changed, 131 insertions(+), 45 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/ff442bbc/core/src/main/java/org/apache/spark/api/java/function/MapGroupFunction.java
--
diff --git 
a/core/src/main/java/org/apache/spark/api/java/function/MapGroupFunction.java 
b/core/src/main/java/org/apache/spark/api/java/function/MapGroupFunction.java
index 2935f99..4f3f222 100644
--- 
a/core/src/main/java/org/apache/spark/api/java/function/MapGroupFunction.java
+++ 
b/core/src/main/java/org/apache/spark/api/java/function/MapGroupFunction.java
@@ -21,7 +21,7 @@ import java.io.Serializable;
 import java.util.Iterator;
 
 /**
- * Base interface for a map function used in GroupedDataset's map function.
+ * Base interface for a map function used in GroupedDataset's mapGroup 
function.
  */
 public interface MapGroupFunction extends Serializable {
   R call(K key, Iterator values) throws Exception;

http://git-wip-us.apache.org/repos/asf/spark/blob/ff442bbc/sql/catalyst/src/main/scala/org/apache/spark/sql/Encoder.scala
--
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/Encoder.scala 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/Encoder.scala
index 5cb8edf..03aa25e 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/Encoder.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/Encoder.scala
@@ -30,6 +30,8 @@ import org.apache.spark.sql.types._
  *
  * Encoders are not intended to be thread-safe and thus they are allow to 
avoid internal locking
  * and reuse internal buffers to improve performance.
+ *
+ * @since 1.6.0
  */
 trait Encoder[T] extends Serializable {
 
@@ -42,6 +44,8 @@ trait Encoder[T] extends Serializable {
 
 /**
  * Methods for creating encoders.
+ *
+ * @since 1.6.0
  */
 object Encoders {
 

http://git-wip-us.apache.org/repos/asf/spark/blob/ff442bbc/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/JavaTypeInference.scala
--
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/JavaTypeInference.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/JavaTypeInference.scala
index 88a457f..7d4cfbe 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/JavaTypeInference.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/JavaTypeInference.scala
@@ -29,7 +29,7 @@ import org.apache.spark.sql.types._
 /**
  * Type-inference utilities for POJOs and Java collections.
  */
-private [sql] object JavaTypeInference {
+object JavaTypeInference {
 
   private val iterableType = TypeToken.of(classOf[JIterable[_]])
   private val mapType = TypeToken.of(classOf[JMap[_, _]])
@@ -53,7 +53,6 @@ private [sql] object JavaTypeInference {
* @return (SQL data type, nullable)
*/
   private def inferDataType(typeToken: TypeToken[_]): (DataType, Boolean) = {
-// TODO: All of this could probably be moved to Catalyst as it is mostly 
not Spark specific.
 typeToken.getRawType match {
   case c: Class[_] if c.isAnnotationPresent(classOf[SQLUserDefinedType]) =>
 (c.getAnnotation(classOf[SQLUserDefinedType]).udt().newInstance(), 
true)

http://git-wip-us.apache.org/repos/asf/spark/blob/ff442bbc/sql/core/src/main/scala/org/apache/spark/sql/Column.scala

spark git commit: [SPARK-11899][SQL] API audit for GroupedDataset.

2015-11-21 Thread rxin
Repository: spark
Updated Branches:
  refs/heads/branch-1.6 05547183b -> 8c718a577


[SPARK-11899][SQL] API audit for GroupedDataset.

1. Renamed map to mapGroup, flatMap to flatMapGroup.
2. Renamed asKey -> keyAs.
3. Added more documentation.
4. Changed type parameter T to V on GroupedDataset.
5. Added since versions for all functions.

Author: Reynold Xin 

Closes #9880 from rxin/SPARK-11899.

(cherry picked from commit ff442bbcffd4f93cfcc2f76d160011e725d2fb3f)
Signed-off-by: Reynold Xin 


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/8c718a57
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/8c718a57
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/8c718a57

Branch: refs/heads/branch-1.6
Commit: 8c718a577e32d9f91dc4cacd58dab894e366d93d
Parents: 0554718
Author: Reynold Xin 
Authored: Sat Nov 21 15:00:37 2015 -0800
Committer: Reynold Xin 
Committed: Sat Nov 21 15:00:47 2015 -0800

--
 .../api/java/function/MapGroupFunction.java |   2 +-
 .../scala/org/apache/spark/sql/Encoder.scala|   4 +
 .../spark/sql/catalyst/JavaTypeInference.scala  |   3 +-
 .../scala/org/apache/spark/sql/Column.scala |   2 +
 .../scala/org/apache/spark/sql/DataFrame.scala  |   1 -
 .../org/apache/spark/sql/GroupedDataset.scala   | 132 +++
 .../org/apache/spark/sql/JavaDatasetSuite.java  |   8 +-
 .../spark/sql/DatasetPrimitiveSuite.scala   |   4 +-
 .../org/apache/spark/sql/DatasetSuite.scala |  20 +--
 9 files changed, 131 insertions(+), 45 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/8c718a57/core/src/main/java/org/apache/spark/api/java/function/MapGroupFunction.java
--
diff --git 
a/core/src/main/java/org/apache/spark/api/java/function/MapGroupFunction.java 
b/core/src/main/java/org/apache/spark/api/java/function/MapGroupFunction.java
index 2935f99..4f3f222 100644
--- 
a/core/src/main/java/org/apache/spark/api/java/function/MapGroupFunction.java
+++ 
b/core/src/main/java/org/apache/spark/api/java/function/MapGroupFunction.java
@@ -21,7 +21,7 @@ import java.io.Serializable;
 import java.util.Iterator;
 
 /**
- * Base interface for a map function used in GroupedDataset's map function.
+ * Base interface for a map function used in GroupedDataset's mapGroup 
function.
  */
 public interface MapGroupFunction extends Serializable {
   R call(K key, Iterator values) throws Exception;

http://git-wip-us.apache.org/repos/asf/spark/blob/8c718a57/sql/catalyst/src/main/scala/org/apache/spark/sql/Encoder.scala
--
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/Encoder.scala 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/Encoder.scala
index 5cb8edf..03aa25e 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/Encoder.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/Encoder.scala
@@ -30,6 +30,8 @@ import org.apache.spark.sql.types._
  *
  * Encoders are not intended to be thread-safe and thus they are allow to 
avoid internal locking
  * and reuse internal buffers to improve performance.
+ *
+ * @since 1.6.0
  */
 trait Encoder[T] extends Serializable {
 
@@ -42,6 +44,8 @@ trait Encoder[T] extends Serializable {
 
 /**
  * Methods for creating encoders.
+ *
+ * @since 1.6.0
  */
 object Encoders {
 

http://git-wip-us.apache.org/repos/asf/spark/blob/8c718a57/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/JavaTypeInference.scala
--
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/JavaTypeInference.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/JavaTypeInference.scala
index 88a457f..7d4cfbe 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/JavaTypeInference.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/JavaTypeInference.scala
@@ -29,7 +29,7 @@ import org.apache.spark.sql.types._
 /**
  * Type-inference utilities for POJOs and Java collections.
  */
-private [sql] object JavaTypeInference {
+object JavaTypeInference {
 
   private val iterableType = TypeToken.of(classOf[JIterable[_]])
   private val mapType = TypeToken.of(classOf[JMap[_, _]])
@@ -53,7 +53,6 @@ private [sql] object JavaTypeInference {
* @return (SQL data type, nullable)
*/
   private def inferDataType(typeToken: TypeToken[_]): (DataType, Boolean) = {
-// TODO: All of this could probably be moved to Catalyst as it is mostly 
not Spark specific.
 typeToken.getRawType match {
   case c: Class[_] if c.isAnnotationPresent(classOf[SQLUserDefinedType]) =>
 (c.getAnnotation(classOf[SQLUserDefinedType]).udt().newInstance(), 
true)

http://git-wip-