git commit: [SPARK-1977][MLLIB] register mutable BitSet in MovieLenseALS

2014-07-07 Thread meng
Repository: spark
Updated Branches:
  refs/heads/branch-1.0 b459aa77f - 5044ba60a


[SPARK-1977][MLLIB] register mutable BitSet in MovieLenseALS

Author: Neville Li nevi...@spotify.com

Closes #1319 from nevillelyh/gh/SPARK-1977 and squashes the following commits:

1f0a355 [Neville Li] [SPARK-1977][MLLIB] register mutable BitSet in 
MovieLenseALS

(cherry picked from commit f7ce1b3b48f0354434456241188c6a5d954852e2)
Signed-off-by: Xiangrui Meng m...@databricks.com


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/5044ba60
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/5044ba60
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/5044ba60

Branch: refs/heads/branch-1.0
Commit: 5044ba60a92495124b97ee97b87a45ce46d6073e
Parents: b459aa7
Author: Neville Li nevi...@spotify.com
Authored: Mon Jul 7 15:06:14 2014 -0700
Committer: Xiangrui Meng m...@databricks.com
Committed: Mon Jul 7 15:08:10 2014 -0700

--
 .../main/scala/org/apache/spark/examples/mllib/MovieLensALS.scala | 3 +++
 1 file changed, 3 insertions(+)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/5044ba60/examples/src/main/scala/org/apache/spark/examples/mllib/MovieLensALS.scala
--
diff --git 
a/examples/src/main/scala/org/apache/spark/examples/mllib/MovieLensALS.scala 
b/examples/src/main/scala/org/apache/spark/examples/mllib/MovieLensALS.scala
index 6eb41e7..7a5a4cc 100644
--- a/examples/src/main/scala/org/apache/spark/examples/mllib/MovieLensALS.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/mllib/MovieLensALS.scala
@@ -17,6 +17,8 @@
 
 package org.apache.spark.examples.mllib
 
+import scala.collection.mutable
+
 import com.esotericsoftware.kryo.Kryo
 import org.apache.log4j.{Level, Logger}
 import scopt.OptionParser
@@ -41,6 +43,7 @@ object MovieLensALS {
   class ALSRegistrator extends KryoRegistrator {
 override def registerClasses(kryo: Kryo) {
   kryo.register(classOf[Rating])
+  kryo.register(classOf[mutable.BitSet])
 }
   }
 



svn commit: r1608626 - in /spark: downloads.md site/downloads.html

2014-07-07 Thread rxin
Author: rxin
Date: Mon Jul  7 22:47:19 2014
New Revision: 1608626

URL: http://svn.apache.org/r1608626
Log:
Fix 1.0.0 release notes link.

Modified:
spark/downloads.md
spark/site/downloads.html

Modified: spark/downloads.md
URL: 
http://svn.apache.org/viewvc/spark/downloads.md?rev=1608626r1=1608625r2=1608626view=diff
==
--- spark/downloads.md (original)
+++ spark/downloads.md Mon Jul  7 22:47:19 2014
@@ -52,7 +52,7 @@ If you are interested in working with th
 Once you've downloaded Spark, you can find instructions for installing and 
building it on the a href={{site.url}}documentation.htmldocumentation 
page/a.
 
 h3 id=all-releasesAll Releases/h3
-* [Spark 1.0.0](http://d3kbcqa49mib13.cloudfront.net/spark-1.0.0.tgz) (May 30, 
2014) [(release notes)]({{site.url}}releases/spark-release-1.0.0.html) 
(prebuilt: [Hadoop1 [HDP1, 
CDH3]](http://d3kbcqa49mib13.cloudfront.net/spark-1.0.0-bin-hadoop1.tgz), 
[CDH4](http://d3kbcqa49mib13.cloudfront.net/spark-1.0.0-bin-cdh4.tgz), [Hadoop2 
[HDP2, CDH5]](http://d3kbcqa49mib13.cloudfront.net/spark-1.0.0-bin-hadoop2.tgz))
+* [Spark 1.0.0](http://d3kbcqa49mib13.cloudfront.net/spark-1.0.0.tgz) (May 30, 
2014) [(release notes)]({{site.url}}releases/spark-release-1-0-0.html) 
(prebuilt: [Hadoop1 [HDP1, 
CDH3]](http://d3kbcqa49mib13.cloudfront.net/spark-1.0.0-bin-hadoop1.tgz), 
[CDH4](http://d3kbcqa49mib13.cloudfront.net/spark-1.0.0-bin-cdh4.tgz), [Hadoop2 
[HDP2, CDH5]](http://d3kbcqa49mib13.cloudfront.net/spark-1.0.0-bin-hadoop2.tgz))
 * [Spark 0.9.1](http://d3kbcqa49mib13.cloudfront.net/spark-0.9.1.tgz) (Apr 9, 
2014) [(release notes)]({{site.url}}releases/spark-release-0-9-1.html) 
(prebuilt: [Hadoop1 [HDP1, 
CDH3]](http://d3kbcqa49mib13.cloudfront.net/spark-0.9.1-bin-hadoop1.tgz), 
[CDH4](http://d3kbcqa49mib13.cloudfront.net/spark-0.9.1-bin-cdh4.tgz), [Hadoop2 
[HDP2, CDH5]](http://d3kbcqa49mib13.cloudfront.net/spark-0.9.1-bin-hadoop2.tgz))
 * [Spark 
0.9.0](http://d3kbcqa49mib13.cloudfront.net/spark-0.9.0-incubating.tgz) (Feb 2, 
2014) [(release notes)]({{site.url}}releases/spark-release-0-9-0.html) 
(prebuilt: [Hadoop1 [HDP1, 
CDH3]](http://d3kbcqa49mib13.cloudfront.net/spark-0.9.0-incubating-bin-hadoop1.tgz),
 
[CDH4](http://d3kbcqa49mib13.cloudfront.net/spark-0.9.0-incubating-bin-cdh4.tgz),
 [Hadoop2 [HDP2, 
CDH5]](http://d3kbcqa49mib13.cloudfront.net/spark-0.9.0-incubating-bin-hadoop2.tgz))
 * [Spark 
0.8.1](http://d3kbcqa49mib13.cloudfront.net/spark-0.8.1-incubating.tgz) (Dec 
19, 2013) [(release notes)]({{site.url}}releases/spark-release-0-8-1.html) 
(prebuilt: [Hadoop1 [HDP1, 
CDH3]](http://d3kbcqa49mib13.cloudfront.net/spark-0.8.1-incubating-bin-hadoop1.tgz),
 
[CDH4](http://d3kbcqa49mib13.cloudfront.net/spark-0.8.1-incubating-bin-cdh4.tgz),
 [Hadoop2 [HDP2, 
CDH5]](http://d3kbcqa49mib13.cloudfront.net/spark-0.8.1-incubating-bin-hadoop2.tgz))

Modified: spark/site/downloads.html
URL: 
http://svn.apache.org/viewvc/spark/site/downloads.html?rev=1608626r1=1608625r2=1608626view=diff
==
--- spark/site/downloads.html (original)
+++ spark/site/downloads.html Mon Jul  7 22:47:19 2014
@@ -206,7 +206,7 @@ version: 1.0.0
 
 h3 id=all-releasesAll Releases/h3
 ul
-  lia href=http://d3kbcqa49mib13.cloudfront.net/spark-1.0.0.tgz;Spark 
1.0.0/a (May 30, 2014) a href=/releases/spark-release-1.0.0.html(release 
notes)/a (prebuilt: a 
href=http://d3kbcqa49mib13.cloudfront.net/spark-1.0.0-bin-hadoop1.tgz;Hadoop1 
[HDP1, CDH3]/a, a 
href=http://d3kbcqa49mib13.cloudfront.net/spark-1.0.0-bin-cdh4.tgz;CDH4/a, 
a 
href=http://d3kbcqa49mib13.cloudfront.net/spark-1.0.0-bin-hadoop2.tgz;Hadoop2 
[HDP2, CDH5]/a)/li
+  lia href=http://d3kbcqa49mib13.cloudfront.net/spark-1.0.0.tgz;Spark 
1.0.0/a (May 30, 2014) a href=/releases/spark-release-1-0-0.html(release 
notes)/a (prebuilt: a 
href=http://d3kbcqa49mib13.cloudfront.net/spark-1.0.0-bin-hadoop1.tgz;Hadoop1 
[HDP1, CDH3]/a, a 
href=http://d3kbcqa49mib13.cloudfront.net/spark-1.0.0-bin-cdh4.tgz;CDH4/a, 
a 
href=http://d3kbcqa49mib13.cloudfront.net/spark-1.0.0-bin-hadoop2.tgz;Hadoop2 
[HDP2, CDH5]/a)/li
   lia href=http://d3kbcqa49mib13.cloudfront.net/spark-0.9.1.tgz;Spark 
0.9.1/a (Apr 9, 2014) a href=/releases/spark-release-0-9-1.html(release 
notes)/a (prebuilt: a 
href=http://d3kbcqa49mib13.cloudfront.net/spark-0.9.1-bin-hadoop1.tgz;Hadoop1 
[HDP1, CDH3]/a, a 
href=http://d3kbcqa49mib13.cloudfront.net/spark-0.9.1-bin-cdh4.tgz;CDH4/a, 
a 
href=http://d3kbcqa49mib13.cloudfront.net/spark-0.9.1-bin-hadoop2.tgz;Hadoop2 
[HDP2, CDH5]/a)/li
   lia 
href=http://d3kbcqa49mib13.cloudfront.net/spark-0.9.0-incubating.tgz;Spark 
0.9.0/a (Feb 2, 2014) a href=/releases/spark-release-0-9-0.html(release 
notes)/a (prebuilt: a 
href=http://d3kbcqa49mib13.cloudfront.net/spark-0.9.0-incubating-bin-hadoop1.tgz;Hadoop1
 [HDP1, CDH3]/a, a 

git commit: [SPARK-2339][SQL] SQL parser in sql-core is case sensitive, but a table alias is converted to lower case when we create Subquery

2014-07-07 Thread marmbrus
Repository: spark
Updated Branches:
  refs/heads/branch-1.0 5044ba60a - e522971e8


[SPARK-2339][SQL] SQL parser in sql-core is case sensitive, but a table alias 
is converted to lower case when we create Subquery

Reported by 
http://apache-spark-user-list.1001560.n3.nabble.com/Spark-SQL-Join-throws-exception-td8599.html
After we get the table from the catalog, because the table has an alias, we 
will temporarily insert a Subquery. Then, we convert the table alias to lower 
case no matter if the parser is case sensitive or not.
To see the issue ...
```
val sqlContext = new org.apache.spark.sql.SQLContext(sc)
import sqlContext.createSchemaRDD

case class Person(name: String, age: Int)

val people = 
sc.textFile(examples/src/main/resources/people.txt).map(_.split(,)).map(p 
= Person(p(0), p(1).trim.toInt))
people.registerAsTable(people)

sqlContext.sql(select PEOPLE.name from people PEOPLE)
```
The plan is ...
```
== Query Plan ==
Project ['PEOPLE.name]
 ExistingRdd [name#0,age#1], MapPartitionsRDD[4] at mapPartitions at 
basicOperators.scala:176
```
You can find that `PEOPLE.name` is not resolved.

This PR introduces three changes.
1.  If a table has an alias, the catalog will not lowercase the alias. If a 
lowercase alias is needed, the analyzer will do the work.
2.  A catalog has a new val caseSensitive that indicates if this catalog is 
case sensitive or not. For example, a SimpleCatalog is case sensitive, but
3.  Corresponding unit tests.
With this PR, case sensitivity of database names and table names is handled by 
the catalog. Case sensitivity of other identifiers are handled by the analyzer.

JIRA: https://issues.apache.org/jira/browse/SPARK-2339

Author: Yin Huai h...@cse.ohio-state.edu

Closes #1317 from yhuai/SPARK-2339 and squashes the following commits:

12d8006 [Yin Huai] Handling case sensitivity correctly. This patch introduces 
three changes. 1. If a table has an alias, the catalog will not lowercase the 
alias. If a lowercase alias is needed, the analyzer will do the work. 2. A 
catalog has a new val caseSensitive that indicates if this catalog is case 
sensitive or not. For example, a SimpleCatalog is case sensitive, but 3. 
Corresponding unit tests. With this patch, case sensitivity of database names 
and table names is handled by the catalog. Case sensitivity of other 
identifiers is handled by the analyzer.

(cherry picked from commit c0b4cf097de50eb2c4b0f0e67da53ee92efc1f77)
Signed-off-by: Michael Armbrust mich...@databricks.com


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/e522971e
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/e522971e
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/e522971e

Branch: refs/heads/branch-1.0
Commit: e522971e81efd3a7ec4a39b20082b890d11caa42
Parents: 5044ba6
Author: Yin Huai h...@cse.ohio-state.edu
Authored: Mon Jul 7 17:01:44 2014 -0700
Committer: Michael Armbrust mich...@databricks.com
Committed: Mon Jul 7 17:01:59 2014 -0700

--
 .../spark/sql/catalyst/analysis/Catalog.scala   | 55 
 .../sql/catalyst/analysis/AnalysisSuite.scala   | 69 +---
 .../scala/org/apache/spark/sql/SQLContext.scala |  2 +-
 .../spark/sql/hive/HiveMetastoreCatalog.scala   | 23 ---
 ...ive table-0-5d14d21a239daa42b086cc895215009a | 14 
 .../sql/hive/execution/HiveQuerySuite.scala | 16 +
 6 files changed, 149 insertions(+), 30 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/e522971e/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Catalog.scala
--
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Catalog.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Catalog.scala
index f30b5d8..0d05d98 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Catalog.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Catalog.scala
@@ -25,6 +25,9 @@ import 
org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, Subquery}
  * An interface for looking up relations by name.  Used by an [[Analyzer]].
  */
 trait Catalog {
+
+  def caseSensitive: Boolean
+
   def lookupRelation(
 databaseName: Option[String],
 tableName: String,
@@ -35,22 +38,44 @@ trait Catalog {
   def unregisterTable(databaseName: Option[String], tableName: String): Unit
 
   def unregisterAllTables(): Unit
+
+  protected def processDatabaseAndTableName(
+  databaseName: Option[String],
+  tableName: String): (Option[String], String) = {
+if (!caseSensitive) {
+  (databaseName.map(_.toLowerCase), tableName.toLowerCase)
+} else {
+  (databaseName, tableName)
+}
+  }
+
+  protected def 

git commit: [SPARK-2339][SQL] SQL parser in sql-core is case sensitive, but a table alias is converted to lower case when we create Subquery

2014-07-07 Thread marmbrus
Repository: spark
Updated Branches:
  refs/heads/master f7ce1b3b4 - c0b4cf097


[SPARK-2339][SQL] SQL parser in sql-core is case sensitive, but a table alias 
is converted to lower case when we create Subquery

Reported by 
http://apache-spark-user-list.1001560.n3.nabble.com/Spark-SQL-Join-throws-exception-td8599.html
After we get the table from the catalog, because the table has an alias, we 
will temporarily insert a Subquery. Then, we convert the table alias to lower 
case no matter if the parser is case sensitive or not.
To see the issue ...
```
val sqlContext = new org.apache.spark.sql.SQLContext(sc)
import sqlContext.createSchemaRDD

case class Person(name: String, age: Int)

val people = 
sc.textFile(examples/src/main/resources/people.txt).map(_.split(,)).map(p 
= Person(p(0), p(1).trim.toInt))
people.registerAsTable(people)

sqlContext.sql(select PEOPLE.name from people PEOPLE)
```
The plan is ...
```
== Query Plan ==
Project ['PEOPLE.name]
 ExistingRdd [name#0,age#1], MapPartitionsRDD[4] at mapPartitions at 
basicOperators.scala:176
```
You can find that `PEOPLE.name` is not resolved.

This PR introduces three changes.
1.  If a table has an alias, the catalog will not lowercase the alias. If a 
lowercase alias is needed, the analyzer will do the work.
2.  A catalog has a new val caseSensitive that indicates if this catalog is 
case sensitive or not. For example, a SimpleCatalog is case sensitive, but
3.  Corresponding unit tests.
With this PR, case sensitivity of database names and table names is handled by 
the catalog. Case sensitivity of other identifiers are handled by the analyzer.

JIRA: https://issues.apache.org/jira/browse/SPARK-2339

Author: Yin Huai h...@cse.ohio-state.edu

Closes #1317 from yhuai/SPARK-2339 and squashes the following commits:

12d8006 [Yin Huai] Handling case sensitivity correctly. This patch introduces 
three changes. 1. If a table has an alias, the catalog will not lowercase the 
alias. If a lowercase alias is needed, the analyzer will do the work. 2. A 
catalog has a new val caseSensitive that indicates if this catalog is case 
sensitive or not. For example, a SimpleCatalog is case sensitive, but 3. 
Corresponding unit tests. With this patch, case sensitivity of database names 
and table names is handled by the catalog. Case sensitivity of other 
identifiers is handled by the analyzer.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/c0b4cf09
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/c0b4cf09
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/c0b4cf09

Branch: refs/heads/master
Commit: c0b4cf097de50eb2c4b0f0e67da53ee92efc1f77
Parents: f7ce1b3
Author: Yin Huai h...@cse.ohio-state.edu
Authored: Mon Jul 7 17:01:44 2014 -0700
Committer: Michael Armbrust mich...@databricks.com
Committed: Mon Jul 7 17:01:44 2014 -0700

--
 .../spark/sql/catalyst/analysis/Catalog.scala   | 55 
 .../sql/catalyst/analysis/AnalysisSuite.scala   | 69 +---
 .../scala/org/apache/spark/sql/SQLContext.scala |  2 +-
 .../spark/sql/hive/HiveMetastoreCatalog.scala   | 23 ---
 ...ive table-0-5d14d21a239daa42b086cc895215009a | 14 
 .../sql/hive/execution/HiveQuerySuite.scala | 16 +
 6 files changed, 149 insertions(+), 30 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/c0b4cf09/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Catalog.scala
--
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Catalog.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Catalog.scala
index f30b5d8..0d05d98 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Catalog.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Catalog.scala
@@ -25,6 +25,9 @@ import 
org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, Subquery}
  * An interface for looking up relations by name.  Used by an [[Analyzer]].
  */
 trait Catalog {
+
+  def caseSensitive: Boolean
+
   def lookupRelation(
 databaseName: Option[String],
 tableName: String,
@@ -35,22 +38,44 @@ trait Catalog {
   def unregisterTable(databaseName: Option[String], tableName: String): Unit
 
   def unregisterAllTables(): Unit
+
+  protected def processDatabaseAndTableName(
+  databaseName: Option[String],
+  tableName: String): (Option[String], String) = {
+if (!caseSensitive) {
+  (databaseName.map(_.toLowerCase), tableName.toLowerCase)
+} else {
+  (databaseName, tableName)
+}
+  }
+
+  protected def processDatabaseAndTableName(
+  databaseName: String,
+  tableName: String): (String, String) = {
+if (!caseSensitive) {
+  

git commit: [SPARK-2375][SQL] JSON schema inference may not resolve type conflicts correctly for a field inside an array of structs

2014-07-07 Thread marmbrus
Repository: spark
Updated Branches:
  refs/heads/branch-1.0 691b554f3 - 1032c2875


[SPARK-2375][SQL] JSON schema inference may not resolve type conflicts 
correctly for a field inside an array of structs

For example, for
```
{array: [{field:214748364700}, {field:1}]}
```
the type of field is resolved as IntType. While, for
```
{array: [{field:1}, {field:214748364700}]}
```
the type of field is resolved as LongType.

JIRA: https://issues.apache.org/jira/browse/SPARK-2375

Author: Yin Huai huaiyin@gmail.com

Closes #1308 from yhuai/SPARK-2375 and squashes the following commits:

3e2e312 [Yin Huai] Update unit test.
1b2ff9f [Yin Huai] Merge remote-tracking branch 'upstream/master' into 
SPARK-2375
10794eb [Yin Huai] Correctly resolve the type of a field inside an array of 
structs.

(cherry picked from commit f0496ee10847db921a028a34f70385f9b740b3f3)
Signed-off-by: Michael Armbrust mich...@databricks.com


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/1032c287
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/1032c287
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/1032c287

Branch: refs/heads/branch-1.0
Commit: 1032c28750ee2305756b44817e99951fe6385a63
Parents: 691b554
Author: Yin Huai huaiyin@gmail.com
Authored: Mon Jul 7 17:05:59 2014 -0700
Committer: Michael Armbrust mich...@databricks.com
Committed: Mon Jul 7 17:06:10 2014 -0700

--
 .../src/main/scala/org/apache/spark/sql/json/JsonRDD.scala  | 9 +
 .../test/scala/org/apache/spark/sql/json/JsonSuite.scala| 8 +---
 .../test/scala/org/apache/spark/sql/json/TestJsonData.scala | 3 ++-
 3 files changed, 12 insertions(+), 8 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/1032c287/sql/core/src/main/scala/org/apache/spark/sql/json/JsonRDD.scala
--
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/json/JsonRDD.scala 
b/sql/core/src/main/scala/org/apache/spark/sql/json/JsonRDD.scala
index edf8677..f6cbca9 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/json/JsonRDD.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/json/JsonRDD.scala
@@ -198,11 +198,12 @@ private[sql] object JsonRDD extends Logging {
* in this JSON object can appear in other JSON objects.
*/
   private def allKeysWithValueTypes(m: Map[String, Any]): Set[(String, 
DataType)] = {
-m.map{
+val keyValuePairs = m.map {
   // Quote the key with backticks to handle cases which have dots
   // in the field name.
-  case (key, dataType) = (s`$key`, dataType)
-}.flatMap {
+  case (key, value) = (s`$key`, value)
+}.toSet
+keyValuePairs.flatMap {
   case (key: String, struct: Map[String, Any]) = {
 // The value associted with the key is an JSON object.
 allKeysWithValueTypes(struct).map {
@@ -224,7 +225,7 @@ private[sql] object JsonRDD extends Logging {
 }
   }
   case (key: String, value) = (key, typeOfPrimitiveValue(value)) :: Nil
-}.toSet
+}
   }
 
   /**

http://git-wip-us.apache.org/repos/asf/spark/blob/1032c287/sql/core/src/test/scala/org/apache/spark/sql/json/JsonSuite.scala
--
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/json/JsonSuite.scala 
b/sql/core/src/test/scala/org/apache/spark/sql/json/JsonSuite.scala
index 10bd9f0..e765cfc 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/json/JsonSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/json/JsonSuite.scala
@@ -451,7 +451,9 @@ class JsonSuite extends QueryTest {
 val jsonSchemaRDD = jsonRDD(arrayElementTypeConflict)
 
 val expectedSchema =
-  AttributeReference(array, ArrayType(StringType), true)() :: Nil
+  AttributeReference(array1, ArrayType(StringType), true)() ::
+  AttributeReference(array2, ArrayType(StructType(
+StructField(field, LongType, true) :: Nil)), true)() :: Nil
 
 comparePlans(Schema(expectedSchema), 
Schema(jsonSchemaRDD.logicalPlan.output))
 
@@ -460,12 +462,12 @@ class JsonSuite extends QueryTest {
 checkAnswer(
   sql(select * from jsonTable),
   Seq(Seq(1, 1.1, true, null, [], {}, [2,3,4],
-{field:str})) :: Nil
+{field:str}), Seq(Seq(214748364700L), Seq(1))) :: Nil
 )
 
 // Treat an element as a number.
 checkAnswer(
-  sql(select array[0] + 1 from jsonTable),
+  sql(select array1[0] + 1 from jsonTable),
   2
 )
   }

http://git-wip-us.apache.org/repos/asf/spark/blob/1032c287/sql/core/src/test/scala/org/apache/spark/sql/json/TestJsonData.scala
--
diff --git 
a/sql/core/src/test/scala/org/apache/spark/sql/json/TestJsonData.scala 

git commit: [SPARK-2376][SQL] Selecting list values inside nested JSON objects raises java.lang.IllegalArgumentException

2014-07-07 Thread marmbrus
Repository: spark
Updated Branches:
  refs/heads/master f0496ee10 - 4352a2fda


[SPARK-2376][SQL] Selecting list values inside nested JSON objects raises 
java.lang.IllegalArgumentException

JIRA: https://issues.apache.org/jira/browse/SPARK-2376

Author: Yin Huai h...@cse.ohio-state.edu

Closes #1320 from yhuai/SPARK-2376 and squashes the following commits:

0107417 [Yin Huai] Merge remote-tracking branch 'upstream/master' into 
SPARK-2376
480803d [Yin Huai] Correctly handling JSON arrays in PySpark.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/4352a2fd
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/4352a2fd
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/4352a2fd

Branch: refs/heads/master
Commit: 4352a2fdaa64efee7158eabef65703460ff284ec
Parents: f0496ee
Author: Yin Huai h...@cse.ohio-state.edu
Authored: Mon Jul 7 18:37:38 2014 -0700
Committer: Michael Armbrust mich...@databricks.com
Committed: Mon Jul 7 18:37:38 2014 -0700

--
 python/pyspark/sql.py   | 24 ++-
 .../scala/org/apache/spark/sql/SchemaRDD.scala  | 45 +---
 2 files changed, 44 insertions(+), 25 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/4352a2fd/python/pyspark/sql.py
--
diff --git a/python/pyspark/sql.py b/python/pyspark/sql.py
index 5051c82..ffe1775 100644
--- a/python/pyspark/sql.py
+++ b/python/pyspark/sql.py
@@ -152,10 +152,12 @@ class SQLContext:
  ofn.close()
  srdd = sqlCtx.jsonFile(jsonFile)
  sqlCtx.registerRDDAsTable(srdd, table1)
- srdd2 = sqlCtx.sql(SELECT field1 AS f1, field2 as f2, field3 as 
f3 from table1)
- srdd2.collect() == [{f1: 1, f2: row1, f3:{field4:11}},
-... {f1: 2, f2: row2, f3:{field4:22}},
-... {f1: 3, f2: row3, f3:{field4:33}}]
+ srdd2 = sqlCtx.sql(
+...   SELECT field1 AS f1, field2 as f2, field3 as f3, field6 as f4 
from table1)
+ srdd2.collect() == [
+... {f1:1, f2:row1, f3:{field4:11, field5: None}, 
f4:None},
+... {f1:2, f2:None, f3:{field4:22,  field5: [10, 11]}, 
f4:[{field7: row2}]},
+... {f1:None, f2:row3, f3:{field4:33, field5: []}, 
f4:None}]
 True
 
 jschema_rdd = self._ssql_ctx.jsonFile(path)
@@ -167,10 +169,12 @@ class SQLContext:
 
  srdd = sqlCtx.jsonRDD(json)
  sqlCtx.registerRDDAsTable(srdd, table1)
- srdd2 = sqlCtx.sql(SELECT field1 AS f1, field2 as f2, field3 as 
f3 from table1)
- srdd2.collect() == [{f1: 1, f2: row1, f3:{field4:11}},
-... {f1: 2, f2: row2, f3:{field4:22}},
-... {f1: 3, f2: row3, f3:{field4:33}}]
+ srdd2 = sqlCtx.sql(
+...   SELECT field1 AS f1, field2 as f2, field3 as f3, field6 as f4 
from table1)
+ srdd2.collect() == [
+... {f1:1, f2:row1, f3:{field4:11, field5: None}, 
f4:None},
+... {f1:2, f2:None, f3:{field4:22,  field5: [10, 11]}, 
f4:[{field7: row2}]},
+... {f1:None, f2:row3, f3:{field4:33, field5: []}, 
f4:None}]
 True
 
 def func(split, iterator):
@@ -492,8 +496,8 @@ def _test():
 globs['rdd'] = sc.parallelize([{field1 : 1, field2 : row1},
 {field1 : 2, field2: row2}, {field1 : 3, field2: row3}])
 jsonStrings = ['{field1: 1, field2: row1, field3:{field4:11}}',
-   '{field1 : 2, field2: row2, field3:{field4:22}}',
-   '{field1 : 3, field2: row3, field3:{field4:33}}']
+   '{field1 : 2, field3:{field4:22, field5: [10, 11]}, 
field6:[{field7: row2}]}',
+   '{field1 : null, field2: row3, field3:{field4:33, field5: 
[]}}']
 globs['jsonStrings'] = jsonStrings
 globs['json'] = sc.parallelize(jsonStrings)
 globs['nestedRdd1'] = sc.parallelize([

http://git-wip-us.apache.org/repos/asf/spark/blob/4352a2fd/sql/core/src/main/scala/org/apache/spark/sql/SchemaRDD.scala
--
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SchemaRDD.scala 
b/sql/core/src/main/scala/org/apache/spark/sql/SchemaRDD.scala
index 8f9f54f..8bcfc7c 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/SchemaRDD.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/SchemaRDD.scala
@@ -17,6 +17,11 @@
 
 package org.apache.spark.sql
 
+import java.util.{Map = JMap, List = JList, Set = JSet}
+
+import scala.collection.JavaConversions._
+import scala.collection.JavaConverters._
+
 import net.razorvine.pickle.Pickler
 
 import org.apache.spark.{Dependency, OneToOneDependency, Partition, 
Partitioner, TaskContext}
@@ -27,10 +32,9 @@ import org.apache.spark.sql.catalyst.analysis._
 import 

git commit: [SPARK-2376][SQL] Selecting list values inside nested JSON objects raises java.lang.IllegalArgumentException

2014-07-07 Thread marmbrus
Repository: spark
Updated Branches:
  refs/heads/branch-1.0 1032c2875 - 9dce7beff


[SPARK-2376][SQL] Selecting list values inside nested JSON objects raises 
java.lang.IllegalArgumentException

JIRA: https://issues.apache.org/jira/browse/SPARK-2376

Author: Yin Huai h...@cse.ohio-state.edu

Closes #1320 from yhuai/SPARK-2376 and squashes the following commits:

0107417 [Yin Huai] Merge remote-tracking branch 'upstream/master' into 
SPARK-2376
480803d [Yin Huai] Correctly handling JSON arrays in PySpark.

(cherry picked from commit 4352a2fdaa64efee7158eabef65703460ff284ec)
Signed-off-by: Michael Armbrust mich...@databricks.com


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/9dce7bef
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/9dce7bef
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/9dce7bef

Branch: refs/heads/branch-1.0
Commit: 9dce7beffb668e42ee05d961ae47f33047d579cc
Parents: 1032c28
Author: Yin Huai h...@cse.ohio-state.edu
Authored: Mon Jul 7 18:37:38 2014 -0700
Committer: Michael Armbrust mich...@databricks.com
Committed: Mon Jul 7 18:52:51 2014 -0700

--
 python/pyspark/sql.py   | 24 ++-
 .../scala/org/apache/spark/sql/SchemaRDD.scala  | 45 +---
 2 files changed, 44 insertions(+), 25 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/9dce7bef/python/pyspark/sql.py
--
diff --git a/python/pyspark/sql.py b/python/pyspark/sql.py
index 5051c82..ffe1775 100644
--- a/python/pyspark/sql.py
+++ b/python/pyspark/sql.py
@@ -152,10 +152,12 @@ class SQLContext:
  ofn.close()
  srdd = sqlCtx.jsonFile(jsonFile)
  sqlCtx.registerRDDAsTable(srdd, table1)
- srdd2 = sqlCtx.sql(SELECT field1 AS f1, field2 as f2, field3 as 
f3 from table1)
- srdd2.collect() == [{f1: 1, f2: row1, f3:{field4:11}},
-... {f1: 2, f2: row2, f3:{field4:22}},
-... {f1: 3, f2: row3, f3:{field4:33}}]
+ srdd2 = sqlCtx.sql(
+...   SELECT field1 AS f1, field2 as f2, field3 as f3, field6 as f4 
from table1)
+ srdd2.collect() == [
+... {f1:1, f2:row1, f3:{field4:11, field5: None}, 
f4:None},
+... {f1:2, f2:None, f3:{field4:22,  field5: [10, 11]}, 
f4:[{field7: row2}]},
+... {f1:None, f2:row3, f3:{field4:33, field5: []}, 
f4:None}]
 True
 
 jschema_rdd = self._ssql_ctx.jsonFile(path)
@@ -167,10 +169,12 @@ class SQLContext:
 
  srdd = sqlCtx.jsonRDD(json)
  sqlCtx.registerRDDAsTable(srdd, table1)
- srdd2 = sqlCtx.sql(SELECT field1 AS f1, field2 as f2, field3 as 
f3 from table1)
- srdd2.collect() == [{f1: 1, f2: row1, f3:{field4:11}},
-... {f1: 2, f2: row2, f3:{field4:22}},
-... {f1: 3, f2: row3, f3:{field4:33}}]
+ srdd2 = sqlCtx.sql(
+...   SELECT field1 AS f1, field2 as f2, field3 as f3, field6 as f4 
from table1)
+ srdd2.collect() == [
+... {f1:1, f2:row1, f3:{field4:11, field5: None}, 
f4:None},
+... {f1:2, f2:None, f3:{field4:22,  field5: [10, 11]}, 
f4:[{field7: row2}]},
+... {f1:None, f2:row3, f3:{field4:33, field5: []}, 
f4:None}]
 True
 
 def func(split, iterator):
@@ -492,8 +496,8 @@ def _test():
 globs['rdd'] = sc.parallelize([{field1 : 1, field2 : row1},
 {field1 : 2, field2: row2}, {field1 : 3, field2: row3}])
 jsonStrings = ['{field1: 1, field2: row1, field3:{field4:11}}',
-   '{field1 : 2, field2: row2, field3:{field4:22}}',
-   '{field1 : 3, field2: row3, field3:{field4:33}}']
+   '{field1 : 2, field3:{field4:22, field5: [10, 11]}, 
field6:[{field7: row2}]}',
+   '{field1 : null, field2: row3, field3:{field4:33, field5: 
[]}}']
 globs['jsonStrings'] = jsonStrings
 globs['json'] = sc.parallelize(jsonStrings)
 globs['nestedRdd1'] = sc.parallelize([

http://git-wip-us.apache.org/repos/asf/spark/blob/9dce7bef/sql/core/src/main/scala/org/apache/spark/sql/SchemaRDD.scala
--
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SchemaRDD.scala 
b/sql/core/src/main/scala/org/apache/spark/sql/SchemaRDD.scala
index 8f9f54f..8bcfc7c 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/SchemaRDD.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/SchemaRDD.scala
@@ -17,6 +17,11 @@
 
 package org.apache.spark.sql
 
+import java.util.{Map = JMap, List = JList, Set = JSet}
+
+import scala.collection.JavaConversions._
+import scala.collection.JavaConverters._
+
 import net.razorvine.pickle.Pickler
 
 import org.apache.spark.{Dependency,