spark git commit: [MINOR] Fix the a minor bug in PageRank Example.
Repository: spark Updated Branches: refs/heads/branch-1.2 d5763c3b9 - c0a0eaacc [MINOR] Fix the a minor bug in PageRank Example. Fix the bug that entering only 1 arg will cause array out of bounds exception in PageRank example. Author: Li Yao hnkfli...@gmail.com Closes #6455 from lastland/patch-1 and squashes the following commits: de06128 [Li Yao] Fix the bug that entering only 1 arg will cause array out of bounds exception. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/c0a0eaac Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/c0a0eaac Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/c0a0eaac Branch: refs/heads/branch-1.2 Commit: c0a0eaacc10a2d0bf2badf551e9068738b98b1d9 Parents: d5763c3 Author: Li Yao hnkfli...@gmail.com Authored: Thu May 28 13:39:39 2015 -0700 Committer: Andrew Or and...@databricks.com Committed: Thu May 28 13:42:32 2015 -0700 -- .../src/main/scala/org/apache/spark/examples/SparkPageRank.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/c0a0eaac/examples/src/main/scala/org/apache/spark/examples/SparkPageRank.scala -- diff --git a/examples/src/main/scala/org/apache/spark/examples/SparkPageRank.scala b/examples/src/main/scala/org/apache/spark/examples/SparkPageRank.scala index 8d092b6..bd7894f 100644 --- a/examples/src/main/scala/org/apache/spark/examples/SparkPageRank.scala +++ b/examples/src/main/scala/org/apache/spark/examples/SparkPageRank.scala @@ -51,7 +51,7 @@ object SparkPageRank { showWarning() val sparkConf = new SparkConf().setAppName(PageRank) -val iters = if (args.length 0) args(1).toInt else 10 +val iters = if (args.length 1) args(1).toInt else 10 val ctx = new SparkContext(sparkConf) val lines = ctx.textFile(args(0), 1) val links = lines.map{ s = - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [DOCS] Fixing broken IDE setup link in the Building Spark documentation.
Repository: spark Updated Branches: refs/heads/master c771589c9 - 3e312a5ed [DOCS] Fixing broken IDE setup link in the Building Spark documentation. The location of the IDE setup information has changed, so this just updates the link on the Building Spark page. Author: Mike Dusenberry dusenberr...@gmail.com Closes #6467 from dusenberrymw/Fix_Broken_Link_On_Building_Spark_Doc and squashes the following commits: 75c533a [Mike Dusenberry] Fixing broken IDE setup link in the Building Spark documentation by pointing to new location. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/3e312a5e Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/3e312a5e Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/3e312a5e Branch: refs/heads/master Commit: 3e312a5ed0154527c66e0d2cc3bfce0a820e Parents: c771589 Author: Mike Dusenberry dusenberr...@gmail.com Authored: Thu May 28 17:15:10 2015 -0400 Committer: Sean Owen so...@cloudera.com Committed: Thu May 28 17:15:10 2015 -0400 -- docs/building-spark.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/3e312a5e/docs/building-spark.md -- diff --git a/docs/building-spark.md b/docs/building-spark.md index 3ca7f27..b2649d1 100644 --- a/docs/building-spark.md +++ b/docs/building-spark.md @@ -176,7 +176,7 @@ Thus, the full flow for running continuous-compilation of the `core` submodule m # Building Spark with IntelliJ IDEA or Eclipse For help in setting up IntelliJ IDEA or Eclipse for Spark development, and troubleshooting, refer to the -[wiki page for IDE setup](https://cwiki.apache.org/confluence/display/SPARK/Contributing+to+Spark#ContributingtoSpark-IDESetup). +[wiki page for IDE setup](https://cwiki.apache.org/confluence/display/SPARK/Useful+Developer+Tools#UsefulDeveloperTools-IDESetup). # Running Java 8 Test Suites - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [MINOR] Fix the a minor bug in PageRank Example.
Repository: spark Updated Branches: refs/heads/branch-1.3 33e1539b3 - 7a3feb552 [MINOR] Fix the a minor bug in PageRank Example. Fix the bug that entering only 1 arg will cause array out of bounds exception in PageRank example. Author: Li Yao hnkfli...@gmail.com Closes #6455 from lastland/patch-1 and squashes the following commits: de06128 [Li Yao] Fix the bug that entering only 1 arg will cause array out of bounds exception. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/7a3feb55 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/7a3feb55 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/7a3feb55 Branch: refs/heads/branch-1.3 Commit: 7a3feb55296bfdd522d13455647c73926d215934 Parents: 33e1539 Author: Li Yao hnkfli...@gmail.com Authored: Thu May 28 13:39:39 2015 -0700 Committer: Andrew Or and...@databricks.com Committed: Thu May 28 13:41:59 2015 -0700 -- .../src/main/scala/org/apache/spark/examples/SparkPageRank.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/7a3feb55/examples/src/main/scala/org/apache/spark/examples/SparkPageRank.scala -- diff --git a/examples/src/main/scala/org/apache/spark/examples/SparkPageRank.scala b/examples/src/main/scala/org/apache/spark/examples/SparkPageRank.scala index 8d092b6..bd7894f 100644 --- a/examples/src/main/scala/org/apache/spark/examples/SparkPageRank.scala +++ b/examples/src/main/scala/org/apache/spark/examples/SparkPageRank.scala @@ -51,7 +51,7 @@ object SparkPageRank { showWarning() val sparkConf = new SparkConf().setAppName(PageRank) -val iters = if (args.length 0) args(1).toInt else 10 +val iters = if (args.length 1) args(1).toInt else 10 val ctx = new SparkContext(sparkConf) val lines = ctx.textFile(args(0), 1) val links = lines.map{ s = - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [DOCS] Fixing broken IDE setup link in the Building Spark documentation.
Repository: spark Updated Branches: refs/heads/branch-1.4 448528398 - 0a65224ae [DOCS] Fixing broken IDE setup link in the Building Spark documentation. The location of the IDE setup information has changed, so this just updates the link on the Building Spark page. Author: Mike Dusenberry dusenberr...@gmail.com Closes #6467 from dusenberrymw/Fix_Broken_Link_On_Building_Spark_Doc and squashes the following commits: 75c533a [Mike Dusenberry] Fixing broken IDE setup link in the Building Spark documentation by pointing to new location. (cherry picked from commit 3e312a5ed0154527c66e0d2cc3bfce0a820e) Signed-off-by: Sean Owen so...@cloudera.com Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/0a65224a Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/0a65224a Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/0a65224a Branch: refs/heads/branch-1.4 Commit: 0a65224aed9d2bb780e0d3e70d2a7ba34f30219b Parents: 4485283 Author: Mike Dusenberry dusenberr...@gmail.com Authored: Thu May 28 17:15:10 2015 -0400 Committer: Sean Owen so...@cloudera.com Committed: Thu May 28 17:16:42 2015 -0400 -- docs/building-spark.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/0a65224a/docs/building-spark.md -- diff --git a/docs/building-spark.md b/docs/building-spark.md index 3ca7f27..b2649d1 100644 --- a/docs/building-spark.md +++ b/docs/building-spark.md @@ -176,7 +176,7 @@ Thus, the full flow for running continuous-compilation of the `core` submodule m # Building Spark with IntelliJ IDEA or Eclipse For help in setting up IntelliJ IDEA or Eclipse for Spark development, and troubleshooting, refer to the -[wiki page for IDE setup](https://cwiki.apache.org/confluence/display/SPARK/Contributing+to+Spark#ContributingtoSpark-IDESetup). +[wiki page for IDE setup](https://cwiki.apache.org/confluence/display/SPARK/Useful+Developer+Tools#UsefulDeveloperTools-IDESetup). # Running Java 8 Test Suites - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [DOCS] Fixing broken IDE setup link in the Building Spark documentation.
Repository: spark Updated Branches: refs/heads/branch-1.3 7a3feb552 - d09a053ec [DOCS] Fixing broken IDE setup link in the Building Spark documentation. The location of the IDE setup information has changed, so this just updates the link on the Building Spark page. Author: Mike Dusenberry dusenberr...@gmail.com Closes #6467 from dusenberrymw/Fix_Broken_Link_On_Building_Spark_Doc and squashes the following commits: 75c533a [Mike Dusenberry] Fixing broken IDE setup link in the Building Spark documentation by pointing to new location. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/d09a053e Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/d09a053e Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/d09a053e Branch: refs/heads/branch-1.3 Commit: d09a053ecc6b13ce1f52f0c50a7abb198ae2762f Parents: 7a3feb5 Author: Mike Dusenberry dusenberr...@gmail.com Authored: Thu May 28 17:15:10 2015 -0400 Committer: Sean Owen so...@cloudera.com Committed: Thu May 28 18:18:44 2015 -0400 -- docs/building-spark.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/d09a053e/docs/building-spark.md -- diff --git a/docs/building-spark.md b/docs/building-spark.md index d12ee8b..fd378cf 100644 --- a/docs/building-spark.md +++ b/docs/building-spark.md @@ -161,7 +161,7 @@ Thus, the full flow for running continuous-compilation of the `core` submodule m # Building Spark with IntelliJ IDEA or Eclipse For help in setting up IntelliJ IDEA or Eclipse for Spark development, and troubleshooting, refer to the -[wiki page for IDE setup](https://cwiki.apache.org/confluence/display/SPARK/Contributing+to+Spark#ContributingtoSpark-IDESetup). +[wiki page for IDE setup](https://cwiki.apache.org/confluence/display/SPARK/Useful+Developer+Tools#UsefulDeveloperTools-IDESetup). # Building Spark Debian Packages - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [MINOR] Fix the a minor bug in PageRank Example.
Repository: spark Updated Branches: refs/heads/branch-1.4 7b5dffb80 - 448528398 [MINOR] Fix the a minor bug in PageRank Example. Fix the bug that entering only 1 arg will cause array out of bounds exception in PageRank example. Author: Li Yao hnkfli...@gmail.com Closes #6455 from lastland/patch-1 and squashes the following commits: de06128 [Li Yao] Fix the bug that entering only 1 arg will cause array out of bounds exception. (cherry picked from commit c771589c96403b2a518fb77d5162eca8f495f37b) Signed-off-by: Andrew Or and...@databricks.com Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/44852839 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/44852839 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/44852839 Branch: refs/heads/branch-1.4 Commit: 4485283981e4592dd817fc8956b4a6faea06d817 Parents: 7b5dffb Author: Li Yao hnkfli...@gmail.com Authored: Thu May 28 13:39:39 2015 -0700 Committer: Andrew Or and...@databricks.com Committed: Thu May 28 13:39:49 2015 -0700 -- .../src/main/scala/org/apache/spark/examples/SparkPageRank.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/44852839/examples/src/main/scala/org/apache/spark/examples/SparkPageRank.scala -- diff --git a/examples/src/main/scala/org/apache/spark/examples/SparkPageRank.scala b/examples/src/main/scala/org/apache/spark/examples/SparkPageRank.scala index 8d092b6..bd7894f 100644 --- a/examples/src/main/scala/org/apache/spark/examples/SparkPageRank.scala +++ b/examples/src/main/scala/org/apache/spark/examples/SparkPageRank.scala @@ -51,7 +51,7 @@ object SparkPageRank { showWarning() val sparkConf = new SparkConf().setAppName(PageRank) -val iters = if (args.length 0) args(1).toInt else 10 +val iters = if (args.length 1) args(1).toInt else 10 val ctx = new SparkContext(sparkConf) val lines = ctx.textFile(args(0), 1) val links = lines.map{ s = - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [MINOR] Fix the a minor bug in PageRank Example.
Repository: spark Updated Branches: refs/heads/master 530efe3e8 - c771589c9 [MINOR] Fix the a minor bug in PageRank Example. Fix the bug that entering only 1 arg will cause array out of bounds exception in PageRank example. Author: Li Yao hnkfli...@gmail.com Closes #6455 from lastland/patch-1 and squashes the following commits: de06128 [Li Yao] Fix the bug that entering only 1 arg will cause array out of bounds exception. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/c771589c Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/c771589c Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/c771589c Branch: refs/heads/master Commit: c771589c96403b2a518fb77d5162eca8f495f37b Parents: 530efe3 Author: Li Yao hnkfli...@gmail.com Authored: Thu May 28 13:39:39 2015 -0700 Committer: Andrew Or and...@databricks.com Committed: Thu May 28 13:39:39 2015 -0700 -- .../src/main/scala/org/apache/spark/examples/SparkPageRank.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/c771589c/examples/src/main/scala/org/apache/spark/examples/SparkPageRank.scala -- diff --git a/examples/src/main/scala/org/apache/spark/examples/SparkPageRank.scala b/examples/src/main/scala/org/apache/spark/examples/SparkPageRank.scala index 8d092b6..bd7894f 100644 --- a/examples/src/main/scala/org/apache/spark/examples/SparkPageRank.scala +++ b/examples/src/main/scala/org/apache/spark/examples/SparkPageRank.scala @@ -51,7 +51,7 @@ object SparkPageRank { showWarning() val sparkConf = new SparkConf().setAppName(PageRank) -val iters = if (args.length 0) args(1).toInt else 10 +val iters = if (args.length 1) args(1).toInt else 10 val ctx = new SparkContext(sparkConf) val lines = ctx.textFile(args(0), 1) val links = lines.map{ s = - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-7911] [MLLIB] A workaround for VectorUDT serialize (or deserialize) being called multiple times
Repository: spark Updated Branches: refs/heads/branch-1.4 ab62d73dd - 7b5dffb80 [SPARK-7911] [MLLIB] A workaround for VectorUDT serialize (or deserialize) being called multiple times ~~A PythonUDT shouldn't be serialized into external Scala types in PythonRDD. I'm not sure whether this should fix one of the bugs related to SQL UDT/UDF in PySpark.~~ The fix above didn't work. So I added a workaround for this. If a Python UDF is applied to a Python UDT. This will put the Python SQL types as inputs. Still incorrect, but at least it doesn't throw exceptions on the Scala side. davies harsha2010 Author: Xiangrui Meng m...@databricks.com Closes #6442 from mengxr/SPARK-7903 and squashes the following commits: c257d2a [Xiangrui Meng] add a workaround for VectorUDT (cherry picked from commit 530efe3e80c62b25c869b85167e00330eb1ddea6) Signed-off-by: Xiangrui Meng m...@databricks.com Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/7b5dffb8 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/7b5dffb8 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/7b5dffb8 Branch: refs/heads/branch-1.4 Commit: 7b5dffb80288cb491cd9de9da653a78d800be55b Parents: ab62d73 Author: Xiangrui Meng m...@databricks.com Authored: Thu May 28 12:03:46 2015 -0700 Committer: Xiangrui Meng m...@databricks.com Committed: Thu May 28 12:03:55 2015 -0700 -- .../org/apache/spark/mllib/linalg/Vectors.scala | 19 ++- 1 file changed, 14 insertions(+), 5 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/7b5dffb8/mllib/src/main/scala/org/apache/spark/mllib/linalg/Vectors.scala -- diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/Vectors.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/Vectors.scala index f6bcdf8..2ffa497 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/Vectors.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/Vectors.scala @@ -176,27 +176,31 @@ private[spark] class VectorUDT extends UserDefinedType[Vector] { } override def serialize(obj: Any): Row = { -val row = new GenericMutableRow(4) obj match { case SparseVector(size, indices, values) = +val row = new GenericMutableRow(4) row.setByte(0, 0) row.setInt(1, size) row.update(2, indices.toSeq) row.update(3, values.toSeq) +row case DenseVector(values) = +val row = new GenericMutableRow(4) row.setByte(0, 1) row.setNullAt(1) row.setNullAt(2) row.update(3, values.toSeq) +row + // TODO: There are bugs in UDT serialization because we don't have a clear separation between + // TODO: internal SQL types and language specific types (including UDT). UDT serialize and + // TODO: deserialize may get called twice. See SPARK-7186. + case row: Row = +row } -row } override def deserialize(datum: Any): Vector = { datum match { - // TODO: something wrong with UDT serialization - case v: Vector = -v case row: Row = require(row.length == 4, sVectorUDT.deserialize given row with length ${row.length} but requires length == 4) @@ -211,6 +215,11 @@ private[spark] class VectorUDT extends UserDefinedType[Vector] { val values = row.getAs[Iterable[Double]](3).toArray new DenseVector(values) } + // TODO: There are bugs in UDT serialization because we don't have a clear separation between + // TODO: internal SQL types and language specific types (including UDT). UDT serialize and + // TODO: deserialize may get called twice. See SPARK-7186. + case v: Vector = +v } } - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-7911] [MLLIB] A workaround for VectorUDT serialize (or deserialize) being called multiple times
Repository: spark Updated Branches: refs/heads/master 000df2f0d - 530efe3e8 [SPARK-7911] [MLLIB] A workaround for VectorUDT serialize (or deserialize) being called multiple times ~~A PythonUDT shouldn't be serialized into external Scala types in PythonRDD. I'm not sure whether this should fix one of the bugs related to SQL UDT/UDF in PySpark.~~ The fix above didn't work. So I added a workaround for this. If a Python UDF is applied to a Python UDT. This will put the Python SQL types as inputs. Still incorrect, but at least it doesn't throw exceptions on the Scala side. davies harsha2010 Author: Xiangrui Meng m...@databricks.com Closes #6442 from mengxr/SPARK-7903 and squashes the following commits: c257d2a [Xiangrui Meng] add a workaround for VectorUDT Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/530efe3e Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/530efe3e Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/530efe3e Branch: refs/heads/master Commit: 530efe3e80c62b25c869b85167e00330eb1ddea6 Parents: 000df2f Author: Xiangrui Meng m...@databricks.com Authored: Thu May 28 12:03:46 2015 -0700 Committer: Xiangrui Meng m...@databricks.com Committed: Thu May 28 12:03:46 2015 -0700 -- .../org/apache/spark/mllib/linalg/Vectors.scala | 19 ++- 1 file changed, 14 insertions(+), 5 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/530efe3e/mllib/src/main/scala/org/apache/spark/mllib/linalg/Vectors.scala -- diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/Vectors.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/Vectors.scala index f6bcdf8..2ffa497 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/Vectors.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/Vectors.scala @@ -176,27 +176,31 @@ private[spark] class VectorUDT extends UserDefinedType[Vector] { } override def serialize(obj: Any): Row = { -val row = new GenericMutableRow(4) obj match { case SparseVector(size, indices, values) = +val row = new GenericMutableRow(4) row.setByte(0, 0) row.setInt(1, size) row.update(2, indices.toSeq) row.update(3, values.toSeq) +row case DenseVector(values) = +val row = new GenericMutableRow(4) row.setByte(0, 1) row.setNullAt(1) row.setNullAt(2) row.update(3, values.toSeq) +row + // TODO: There are bugs in UDT serialization because we don't have a clear separation between + // TODO: internal SQL types and language specific types (including UDT). UDT serialize and + // TODO: deserialize may get called twice. See SPARK-7186. + case row: Row = +row } -row } override def deserialize(datum: Any): Vector = { datum match { - // TODO: something wrong with UDT serialization - case v: Vector = -v case row: Row = require(row.length == 4, sVectorUDT.deserialize given row with length ${row.length} but requires length == 4) @@ -211,6 +215,11 @@ private[spark] class VectorUDT extends UserDefinedType[Vector] { val values = row.getAs[Iterable[Double]](3).toArray new DenseVector(values) } + // TODO: There are bugs in UDT serialization because we don't have a clear separation between + // TODO: internal SQL types and language specific types (including UDT). UDT serialize and + // TODO: deserialize may get called twice. See SPARK-7186. + case v: Vector = +v } } - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-7853] [SQL] Fix HiveContext in Spark Shell
Repository: spark Updated Branches: refs/heads/master 0077af22c - 572b62caf [SPARK-7853] [SQL] Fix HiveContext in Spark Shell https://issues.apache.org/jira/browse/SPARK-7853 This fixes the problem introduced by my change in https://github.com/apache/spark/pull/6435, which causes that Hive Context fails to create in spark shell because of the class loader issue. Author: Yin Huai yh...@databricks.com Closes #6459 from yhuai/SPARK-7853 and squashes the following commits: 37ad33e [Yin Huai] Do not use hiveQlTable at all. 47cdb6d [Yin Huai] Move hiveconf.set to the end of setConf. 005649b [Yin Huai] Update comment. 35d86f3 [Yin Huai] Access TTable directly to make sure Hive will not internally use any metastore utility functions. 3737766 [Yin Huai] Recursively find all jars. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/572b62ca Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/572b62ca Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/572b62ca Branch: refs/heads/master Commit: 572b62cafe4bc7b1d464c9dcfb449c9d53456826 Parents: 0077af2 Author: Yin Huai yh...@databricks.com Authored: Thu May 28 17:12:30 2015 -0700 Committer: Yin Huai yh...@databricks.com Committed: Thu May 28 17:12:30 2015 -0700 -- .../org/apache/spark/sql/hive/HiveContext.scala | 35 +++- .../spark/sql/hive/HiveMetastoreCatalog.scala | 12 +++ 2 files changed, 25 insertions(+), 22 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/572b62ca/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala -- diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala index 9ab98fd..2ed71d3 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala @@ -189,24 +189,22 @@ class HiveContext(sc: SparkContext) extends SQLContext(sc) { Specify a vaild path to the correct hive jars using $HIVE_METASTORE_JARS + sor change $HIVE_METASTORE_VERSION to $hiveExecutionVersion.) } - // We recursively add all jars in the class loader chain, - // starting from the given urlClassLoader. - def addJars(urlClassLoader: URLClassLoader): Array[URL] = { -val jarsInParent = urlClassLoader.getParent match { - case parent: URLClassLoader = addJars(parent) - case other = Array.empty[URL] -} -urlClassLoader.getURLs ++ jarsInParent + // We recursively find all jars in the class loader chain, + // starting from the given classLoader. + def allJars(classLoader: ClassLoader): Array[URL] = classLoader match { +case null = Array.empty[URL] +case urlClassLoader: URLClassLoader = + urlClassLoader.getURLs ++ allJars(urlClassLoader.getParent) +case other = allJars(other.getParent) } - val jars = Utils.getContextOrSparkClassLoader match { -case urlClassLoader: URLClassLoader = addJars(urlClassLoader) -case other = - throw new IllegalArgumentException( -Unable to locate hive jars to connect to metastore + -susing classloader ${other.getClass.getName}. + -Please set spark.sql.hive.metastore.jars) + val classLoader = Utils.getContextOrSparkClassLoader + val jars = allJars(classLoader) + if (jars.length == 0) { +throw new IllegalArgumentException( + Unable to locate hive jars to connect to metastore. + +Please set spark.sql.hive.metastore.jars.) } logInfo( @@ -356,9 +354,14 @@ class HiveContext(sc: SparkContext) extends SQLContext(sc) { override def setConf(key: String, value: String): Unit = { super.setConf(key, value) -hiveconf.set(key, value) executionHive.runSqlHive(sSET $key=$value) metadataHive.runSqlHive(sSET $key=$value) +// If users put any Spark SQL setting in the spark conf (e.g. spark-defaults.conf), +// this setConf will be called in the constructor of the SQLContext. +// Also, calling hiveconf will create a default session containing a HiveConf, which +// will interfer with the creation of executionHive (which is a lazy val). So, +// we put hiveconf.set at the end of this method. +hiveconf.set(key, value) } /* A catalyst metadata catalog that points to the Hive Metastore. */ http://git-wip-us.apache.org/repos/asf/spark/blob/572b62ca/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala -- diff --git
spark git commit: [SPARK-7853] [SQL] Fix HiveContext in Spark Shell
Repository: spark Updated Branches: refs/heads/branch-1.4 9c2c6b4a6 - 8f4a86eaa [SPARK-7853] [SQL] Fix HiveContext in Spark Shell https://issues.apache.org/jira/browse/SPARK-7853 This fixes the problem introduced by my change in https://github.com/apache/spark/pull/6435, which causes that Hive Context fails to create in spark shell because of the class loader issue. Author: Yin Huai yh...@databricks.com Closes #6459 from yhuai/SPARK-7853 and squashes the following commits: 37ad33e [Yin Huai] Do not use hiveQlTable at all. 47cdb6d [Yin Huai] Move hiveconf.set to the end of setConf. 005649b [Yin Huai] Update comment. 35d86f3 [Yin Huai] Access TTable directly to make sure Hive will not internally use any metastore utility functions. 3737766 [Yin Huai] Recursively find all jars. (cherry picked from commit 572b62cafe4bc7b1d464c9dcfb449c9d53456826) Signed-off-by: Yin Huai yh...@databricks.com Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/8f4a86ea Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/8f4a86ea Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/8f4a86ea Branch: refs/heads/branch-1.4 Commit: 8f4a86eaa1cad9a2a7607fd5446105c93e5e424e Parents: 9c2c6b4 Author: Yin Huai yh...@databricks.com Authored: Thu May 28 17:12:30 2015 -0700 Committer: Yin Huai yh...@databricks.com Committed: Thu May 28 17:12:38 2015 -0700 -- .../org/apache/spark/sql/hive/HiveContext.scala | 35 +++- .../spark/sql/hive/HiveMetastoreCatalog.scala | 12 +++ 2 files changed, 25 insertions(+), 22 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/8f4a86ea/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala -- diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala index 9ab98fd..2ed71d3 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala @@ -189,24 +189,22 @@ class HiveContext(sc: SparkContext) extends SQLContext(sc) { Specify a vaild path to the correct hive jars using $HIVE_METASTORE_JARS + sor change $HIVE_METASTORE_VERSION to $hiveExecutionVersion.) } - // We recursively add all jars in the class loader chain, - // starting from the given urlClassLoader. - def addJars(urlClassLoader: URLClassLoader): Array[URL] = { -val jarsInParent = urlClassLoader.getParent match { - case parent: URLClassLoader = addJars(parent) - case other = Array.empty[URL] -} -urlClassLoader.getURLs ++ jarsInParent + // We recursively find all jars in the class loader chain, + // starting from the given classLoader. + def allJars(classLoader: ClassLoader): Array[URL] = classLoader match { +case null = Array.empty[URL] +case urlClassLoader: URLClassLoader = + urlClassLoader.getURLs ++ allJars(urlClassLoader.getParent) +case other = allJars(other.getParent) } - val jars = Utils.getContextOrSparkClassLoader match { -case urlClassLoader: URLClassLoader = addJars(urlClassLoader) -case other = - throw new IllegalArgumentException( -Unable to locate hive jars to connect to metastore + -susing classloader ${other.getClass.getName}. + -Please set spark.sql.hive.metastore.jars) + val classLoader = Utils.getContextOrSparkClassLoader + val jars = allJars(classLoader) + if (jars.length == 0) { +throw new IllegalArgumentException( + Unable to locate hive jars to connect to metastore. + +Please set spark.sql.hive.metastore.jars.) } logInfo( @@ -356,9 +354,14 @@ class HiveContext(sc: SparkContext) extends SQLContext(sc) { override def setConf(key: String, value: String): Unit = { super.setConf(key, value) -hiveconf.set(key, value) executionHive.runSqlHive(sSET $key=$value) metadataHive.runSqlHive(sSET $key=$value) +// If users put any Spark SQL setting in the spark conf (e.g. spark-defaults.conf), +// this setConf will be called in the constructor of the SQLContext. +// Also, calling hiveconf will create a default session containing a HiveConf, which +// will interfer with the creation of executionHive (which is a lazy val). So, +// we put hiveconf.set at the end of this method. +hiveconf.set(key, value) } /* A catalyst metadata catalog that points to the Hive Metastore. */
spark git commit: [SPARK-7577] [ML] [DOC] add bucketizer doc
Repository: spark Updated Branches: refs/heads/master 572b62caf - 1bd63e82f [SPARK-7577] [ML] [DOC] add bucketizer doc CC jkbradley Author: Xusen Yin yinxu...@gmail.com Closes #6451 from yinxusen/SPARK-7577 and squashes the following commits: e2dc32e [Xusen Yin] rename colums e350e49 [Xusen Yin] add all demos 006ddf1 [Xusen Yin] add java test 3238481 [Xusen Yin] add bucketizer Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/1bd63e82 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/1bd63e82 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/1bd63e82 Branch: refs/heads/master Commit: 1bd63e82fdb6ee57c61051430d63685b801df016 Parents: 572b62c Author: Xusen Yin yinxu...@gmail.com Authored: Thu May 28 17:30:12 2015 -0700 Committer: Joseph K. Bradley jos...@databricks.com Committed: Thu May 28 17:30:12 2015 -0700 -- docs/ml-features.md | 86 .../spark/ml/feature/JavaBucketizerSuite.java | 80 ++ 2 files changed, 166 insertions(+) -- http://git-wip-us.apache.org/repos/asf/spark/blob/1bd63e82/docs/ml-features.md -- diff --git a/docs/ml-features.md b/docs/ml-features.md index efe9b3b..d7851a5 100644 --- a/docs/ml-features.md +++ b/docs/ml-features.md @@ -789,6 +789,92 @@ scaledData = scalerModel.transform(dataFrame) /div /div +## Bucketizer + +`Bucketizer` transforms a column of continuous features to a column of feature buckets, where the buckets are specified by users. It takes a parameter: + +* `splits`: Parameter for mapping continuous features into buckets. With n+1 splits, there are n buckets. A bucket defined by splits x,y holds values in the range [x,y) except the last bucket, which also includes y. Splits should be strictly increasing. Values at -inf, inf must be explicitly provided to cover all Double values; Otherwise, values outside the splits specified will be treated as errors. Two examples of `splits` are `Array(Double.NegativeInfinity, 0.0, 1.0, Double.PositiveInfinity)` and `Array(0.0, 1.0, 2.0)`. + +Note that if you have no idea of the upper bound and lower bound of the targeted column, you would better add the `Double.NegativeInfinity` and `Double.PositiveInfinity` as the bounds of your splits to prevent a potenial out of Bucketizer bounds exception. + +Note also that the splits that you provided have to be in strictly increasing order, i.e. `s0 s1 s2 ... sn`. + +More details can be found in the API docs for [Bucketizer](api/scala/index.html#org.apache.spark.ml.feature.Bucketizer). + +The following example demonstrates how to bucketize a column of `Double`s into another index-wised column. + +div class=codetabs +div data-lang=scala +{% highlight scala %} +import org.apache.spark.ml.feature.Bucketizer +import org.apache.spark.sql.DataFrame + +val splits = Array(Double.NegativeInfinity, -0.5, 0.0, 0.5, Double.PositiveInfinity) + +val data = Array(-0.5, -0.3, 0.0, 0.2) +val dataFrame = sqlContext.createDataFrame(data.map(Tuple1.apply)).toDF(features) + +val bucketizer = new Bucketizer() + .setInputCol(features) + .setOutputCol(bucketedFeatures) + .setSplits(splits) + +// Transform original data into its bucket index. +val bucketedData = bucketizer.transform(dataFrame) +{% endhighlight %} +/div + +div data-lang=java +{% highlight java %} +import com.google.common.collect.Lists; + +import org.apache.spark.sql.DataFrame; +import org.apache.spark.sql.Row; +import org.apache.spark.sql.RowFactory; +import org.apache.spark.sql.types.DataTypes; +import org.apache.spark.sql.types.Metadata; +import org.apache.spark.sql.types.StructField; +import org.apache.spark.sql.types.StructType; + +double[] splits = {Double.NEGATIVE_INFINITY, -0.5, 0.0, 0.5, Double.POSITIVE_INFINITY}; + +JavaRDDRow data = jsc.parallelize(Lists.newArrayList( + RowFactory.create(-0.5), + RowFactory.create(-0.3), + RowFactory.create(0.0), + RowFactory.create(0.2) +)); +StructType schema = new StructType(new StructField[] { + new StructField(features, DataTypes.DoubleType, false, Metadata.empty()) +}); +DataFrame dataFrame = jsql.createDataFrame(data, schema); + +Bucketizer bucketizer = new Bucketizer() + .setInputCol(features) + .setOutputCol(bucketedFeatures) + .setSplits(splits); + +// Transform original data into its bucket index. +DataFrame bucketedData = bucketizer.transform(dataFrame); +{% endhighlight %} +/div + +div data-lang=python +{% highlight python %} +from pyspark.ml.feature import Bucketizer + +splits = [-float(inf), -0.5, 0.0, 0.5, float(inf)] + +data = [(-0.5,), (-0.3,), (0.0,), (0.2,)] +dataFrame = sqlContext.createDataFrame(data, [features]) + +bucketizer = Bucketizer(splits=splits, inputCol=features,
spark git commit: [SPARK-7577] [ML] [DOC] add bucketizer doc
Repository: spark Updated Branches: refs/heads/branch-1.4 8f4a86eaa - 7bb445a38 [SPARK-7577] [ML] [DOC] add bucketizer doc CC jkbradley Author: Xusen Yin yinxu...@gmail.com Closes #6451 from yinxusen/SPARK-7577 and squashes the following commits: e2dc32e [Xusen Yin] rename colums e350e49 [Xusen Yin] add all demos 006ddf1 [Xusen Yin] add java test 3238481 [Xusen Yin] add bucketizer (cherry picked from commit 1bd63e82fdb6ee57c61051430d63685b801df016) Signed-off-by: Joseph K. Bradley jos...@databricks.com Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/7bb445a3 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/7bb445a3 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/7bb445a3 Branch: refs/heads/branch-1.4 Commit: 7bb445a38ca37e72d0b11ad1c4448632b679eda6 Parents: 8f4a86e Author: Xusen Yin yinxu...@gmail.com Authored: Thu May 28 17:30:12 2015 -0700 Committer: Joseph K. Bradley jos...@databricks.com Committed: Thu May 28 17:30:33 2015 -0700 -- docs/ml-features.md | 86 .../spark/ml/feature/JavaBucketizerSuite.java | 80 ++ 2 files changed, 166 insertions(+) -- http://git-wip-us.apache.org/repos/asf/spark/blob/7bb445a3/docs/ml-features.md -- diff --git a/docs/ml-features.md b/docs/ml-features.md index efe9b3b..d7851a5 100644 --- a/docs/ml-features.md +++ b/docs/ml-features.md @@ -789,6 +789,92 @@ scaledData = scalerModel.transform(dataFrame) /div /div +## Bucketizer + +`Bucketizer` transforms a column of continuous features to a column of feature buckets, where the buckets are specified by users. It takes a parameter: + +* `splits`: Parameter for mapping continuous features into buckets. With n+1 splits, there are n buckets. A bucket defined by splits x,y holds values in the range [x,y) except the last bucket, which also includes y. Splits should be strictly increasing. Values at -inf, inf must be explicitly provided to cover all Double values; Otherwise, values outside the splits specified will be treated as errors. Two examples of `splits` are `Array(Double.NegativeInfinity, 0.0, 1.0, Double.PositiveInfinity)` and `Array(0.0, 1.0, 2.0)`. + +Note that if you have no idea of the upper bound and lower bound of the targeted column, you would better add the `Double.NegativeInfinity` and `Double.PositiveInfinity` as the bounds of your splits to prevent a potenial out of Bucketizer bounds exception. + +Note also that the splits that you provided have to be in strictly increasing order, i.e. `s0 s1 s2 ... sn`. + +More details can be found in the API docs for [Bucketizer](api/scala/index.html#org.apache.spark.ml.feature.Bucketizer). + +The following example demonstrates how to bucketize a column of `Double`s into another index-wised column. + +div class=codetabs +div data-lang=scala +{% highlight scala %} +import org.apache.spark.ml.feature.Bucketizer +import org.apache.spark.sql.DataFrame + +val splits = Array(Double.NegativeInfinity, -0.5, 0.0, 0.5, Double.PositiveInfinity) + +val data = Array(-0.5, -0.3, 0.0, 0.2) +val dataFrame = sqlContext.createDataFrame(data.map(Tuple1.apply)).toDF(features) + +val bucketizer = new Bucketizer() + .setInputCol(features) + .setOutputCol(bucketedFeatures) + .setSplits(splits) + +// Transform original data into its bucket index. +val bucketedData = bucketizer.transform(dataFrame) +{% endhighlight %} +/div + +div data-lang=java +{% highlight java %} +import com.google.common.collect.Lists; + +import org.apache.spark.sql.DataFrame; +import org.apache.spark.sql.Row; +import org.apache.spark.sql.RowFactory; +import org.apache.spark.sql.types.DataTypes; +import org.apache.spark.sql.types.Metadata; +import org.apache.spark.sql.types.StructField; +import org.apache.spark.sql.types.StructType; + +double[] splits = {Double.NEGATIVE_INFINITY, -0.5, 0.0, 0.5, Double.POSITIVE_INFINITY}; + +JavaRDDRow data = jsc.parallelize(Lists.newArrayList( + RowFactory.create(-0.5), + RowFactory.create(-0.3), + RowFactory.create(0.0), + RowFactory.create(0.2) +)); +StructType schema = new StructType(new StructField[] { + new StructField(features, DataTypes.DoubleType, false, Metadata.empty()) +}); +DataFrame dataFrame = jsql.createDataFrame(data, schema); + +Bucketizer bucketizer = new Bucketizer() + .setInputCol(features) + .setOutputCol(bucketedFeatures) + .setSplits(splits); + +// Transform original data into its bucket index. +DataFrame bucketedData = bucketizer.transform(dataFrame); +{% endhighlight %} +/div + +div data-lang=python +{% highlight python %} +from pyspark.ml.feature import Bucketizer + +splits = [-float(inf), -0.5, 0.0, 0.5, float(inf)] + +data = [(-0.5,), (-0.3,), (0.0,), (0.2,)] +dataFrame
spark git commit: [SPARK-7927] [MLLIB] Enforce whitespace for more tokens in style checker
Repository: spark Updated Branches: refs/heads/branch-1.4 3479e6a12 - 0c0511506 [SPARK-7927] [MLLIB] Enforce whitespace for more tokens in style checker rxin Author: Xiangrui Meng m...@databricks.com Closes #6481 from mengxr/mllib-scalastyle and squashes the following commits: 3ca4d61 [Xiangrui Meng] revert scalastyle config 30961ba [Xiangrui Meng] adjust spaces in mllib/test 571b5c5 [Xiangrui Meng] fix spaces in mllib (cherry picked from commit 04616b1a2f5244710b07ecbb404384ded893292c) Signed-off-by: Reynold Xin r...@databricks.com Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/0c051150 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/0c051150 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/0c051150 Branch: refs/heads/branch-1.4 Commit: 0c05115063df39e6058c9c8ea90dd10724a7366d Parents: 3479e6a Author: Xiangrui Meng m...@databricks.com Authored: Thu May 28 20:09:12 2015 -0700 Committer: Reynold Xin r...@databricks.com Committed: Thu May 28 20:09:21 2015 -0700 -- .../spark/ml/classification/OneVsRest.scala | 10 +- .../scala/org/apache/spark/ml/tree/Node.scala | 4 ++-- .../spark/mllib/api/python/PythonMLLibAPI.scala | 8 .../spark/mllib/clustering/GaussianMixture.scala | 2 +- .../spark/mllib/clustering/LDAOptimizer.scala | 6 +++--- .../org/apache/spark/mllib/feature/IDF.scala | 2 +- .../spark/mllib/feature/StandardScaler.scala | 2 +- .../org/apache/spark/mllib/feature/Word2Vec.scala | 6 +++--- .../mllib/linalg/distributed/RowMatrix.scala | 2 +- .../apache/spark/mllib/random/RandomRDDs.scala| 2 +- .../mllib/regression/IsotonicRegression.scala | 2 +- .../mllib/stat/MultivariateOnlineSummarizer.scala | 2 +- .../apache/spark/mllib/stat/test/ChiSqTest.scala | 2 +- .../apache/spark/mllib/tree/DecisionTree.scala| 4 ++-- .../spark/mllib/tree/GradientBoostedTrees.scala | 12 ++-- .../apache/spark/mllib/tree/RandomForest.scala| 2 +- .../org/apache/spark/mllib/tree/model/Node.scala | 8 .../apache/spark/mllib/util/MFDataGenerator.scala | 7 +++ .../apache/spark/ml/feature/Word2VecSuite.scala | 6 +++--- .../spark/ml/tuning/CrossValidatorSuite.scala | 12 +--- .../mllib/api/python/PythonMLLibAPISuite.scala| 2 +- .../mllib/classification/NaiveBayesSuite.scala| 2 +- .../spark/mllib/classification/SVMSuite.scala | 18 +- .../spark/mllib/clustering/KMeansSuite.scala | 4 ++-- .../PowerIterationClusteringSuite.scala | 2 ++ .../mllib/evaluation/RegressionMetricsSuite.scala | 4 ++-- .../spark/mllib/feature/StandardScalerSuite.scala | 2 +- .../linalg/distributed/BlockMatrixSuite.scala | 2 ++ .../mllib/optimization/GradientDescentSuite.scala | 2 +- .../spark/mllib/optimization/NNLSSuite.scala | 2 ++ .../spark/mllib/regression/LassoSuite.scala | 10 ++ .../spark/mllib/stat/CorrelationSuite.scala | 6 +- .../apache/spark/mllib/util/MLUtilsSuite.scala| 2 +- 33 files changed, 88 insertions(+), 71 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/0c051150/mllib/src/main/scala/org/apache/spark/ml/classification/OneVsRest.scala -- diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/OneVsRest.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/OneVsRest.scala index 36735cd..b8c7f3c 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/classification/OneVsRest.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/classification/OneVsRest.scala @@ -70,7 +70,7 @@ private[ml] trait OneVsRestParams extends PredictorParams { final class OneVsRestModel private[ml] ( override val uid: String, labelMetadata: Metadata, -val models: Array[_ : ClassificationModel[_,_]]) +val models: Array[_ : ClassificationModel[_, _]]) extends Model[OneVsRestModel] with OneVsRestParams { override def transformSchema(schema: StructType): StructType = { @@ -104,17 +104,17 @@ final class OneVsRestModel private[ml] ( // add temporary column to store intermediate scores and update val tmpColName = mbc$tmp + UUID.randomUUID().toString -val update: (Map[Int, Double], Vector) = Map[Int, Double] = +val update: (Map[Int, Double], Vector) = Map[Int, Double] = (predictions: Map[Int, Double], prediction: Vector) = { predictions + ((index, prediction(1))) } val updateUdf = callUDF(update, mapType, col(accColName), col(rawPredictionCol)) -val transformedDataset = model.transform(df).select(columns:_*) +val transformedDataset = model.transform(df).select(columns : _*)
spark git commit: [SPARK-7927] [MLLIB] Enforce whitespace for more tokens in style checker
Repository: spark Updated Branches: refs/heads/master 9b692bfdf - 04616b1a2 [SPARK-7927] [MLLIB] Enforce whitespace for more tokens in style checker rxin Author: Xiangrui Meng m...@databricks.com Closes #6481 from mengxr/mllib-scalastyle and squashes the following commits: 3ca4d61 [Xiangrui Meng] revert scalastyle config 30961ba [Xiangrui Meng] adjust spaces in mllib/test 571b5c5 [Xiangrui Meng] fix spaces in mllib Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/04616b1a Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/04616b1a Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/04616b1a Branch: refs/heads/master Commit: 04616b1a2f5244710b07ecbb404384ded893292c Parents: 9b692bf Author: Xiangrui Meng m...@databricks.com Authored: Thu May 28 20:09:12 2015 -0700 Committer: Reynold Xin r...@databricks.com Committed: Thu May 28 20:09:12 2015 -0700 -- .../spark/ml/classification/OneVsRest.scala | 10 +- .../scala/org/apache/spark/ml/tree/Node.scala | 4 ++-- .../spark/mllib/api/python/PythonMLLibAPI.scala | 8 .../spark/mllib/clustering/GaussianMixture.scala | 2 +- .../spark/mllib/clustering/LDAOptimizer.scala | 6 +++--- .../org/apache/spark/mllib/feature/IDF.scala | 2 +- .../spark/mllib/feature/StandardScaler.scala | 2 +- .../org/apache/spark/mllib/feature/Word2Vec.scala | 6 +++--- .../mllib/linalg/distributed/RowMatrix.scala | 2 +- .../apache/spark/mllib/random/RandomRDDs.scala| 2 +- .../mllib/regression/IsotonicRegression.scala | 2 +- .../mllib/stat/MultivariateOnlineSummarizer.scala | 2 +- .../apache/spark/mllib/stat/test/ChiSqTest.scala | 2 +- .../apache/spark/mllib/tree/DecisionTree.scala| 4 ++-- .../spark/mllib/tree/GradientBoostedTrees.scala | 12 ++-- .../apache/spark/mllib/tree/RandomForest.scala| 2 +- .../org/apache/spark/mllib/tree/model/Node.scala | 8 .../apache/spark/mllib/util/MFDataGenerator.scala | 7 +++ .../apache/spark/ml/feature/Word2VecSuite.scala | 6 +++--- .../spark/ml/tuning/CrossValidatorSuite.scala | 12 +--- .../mllib/api/python/PythonMLLibAPISuite.scala| 2 +- .../mllib/classification/NaiveBayesSuite.scala| 2 +- .../spark/mllib/classification/SVMSuite.scala | 18 +- .../spark/mllib/clustering/KMeansSuite.scala | 4 ++-- .../PowerIterationClusteringSuite.scala | 2 ++ .../mllib/evaluation/RegressionMetricsSuite.scala | 4 ++-- .../spark/mllib/feature/StandardScalerSuite.scala | 2 +- .../linalg/distributed/BlockMatrixSuite.scala | 2 ++ .../mllib/optimization/GradientDescentSuite.scala | 2 +- .../spark/mllib/optimization/NNLSSuite.scala | 2 ++ .../spark/mllib/regression/LassoSuite.scala | 10 ++ .../spark/mllib/stat/CorrelationSuite.scala | 6 +- .../apache/spark/mllib/util/MLUtilsSuite.scala| 2 +- 33 files changed, 88 insertions(+), 71 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/04616b1a/mllib/src/main/scala/org/apache/spark/ml/classification/OneVsRest.scala -- diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/OneVsRest.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/OneVsRest.scala index 36735cd..b8c7f3c 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/classification/OneVsRest.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/classification/OneVsRest.scala @@ -70,7 +70,7 @@ private[ml] trait OneVsRestParams extends PredictorParams { final class OneVsRestModel private[ml] ( override val uid: String, labelMetadata: Metadata, -val models: Array[_ : ClassificationModel[_,_]]) +val models: Array[_ : ClassificationModel[_, _]]) extends Model[OneVsRestModel] with OneVsRestParams { override def transformSchema(schema: StructType): StructType = { @@ -104,17 +104,17 @@ final class OneVsRestModel private[ml] ( // add temporary column to store intermediate scores and update val tmpColName = mbc$tmp + UUID.randomUUID().toString -val update: (Map[Int, Double], Vector) = Map[Int, Double] = +val update: (Map[Int, Double], Vector) = Map[Int, Double] = (predictions: Map[Int, Double], prediction: Vector) = { predictions + ((index, prediction(1))) } val updateUdf = callUDF(update, mapType, col(accColName), col(rawPredictionCol)) -val transformedDataset = model.transform(df).select(columns:_*) +val transformedDataset = model.transform(df).select(columns : _*) val updatedDataset = transformedDataset.withColumn(tmpColName, updateUdf) val newColumns = origCols ++
spark git commit: [SPARK-7927] whitespace fixes for SQL core.
Repository: spark Updated Branches: refs/heads/master 04616b1a2 - ff44c711a [SPARK-7927] whitespace fixes for SQL core. So we can enable a whitespace enforcement rule in the style checker to save code review time. Author: Reynold Xin r...@databricks.com Closes #6477 from rxin/whitespace-sql-core and squashes the following commits: ce6e369 [Reynold Xin] Fixed tests. 6095fed [Reynold Xin] [SPARK-7927] whitespace fixes for SQL core. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/ff44c711 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/ff44c711 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/ff44c711 Branch: refs/heads/master Commit: ff44c711abc7ca545dfa1e836279c00fe7539c18 Parents: 04616b1 Author: Reynold Xin r...@databricks.com Authored: Thu May 28 20:10:21 2015 -0700 Committer: Reynold Xin r...@databricks.com Committed: Thu May 28 20:10:21 2015 -0700 -- .../scala/org/apache/spark/sql/Column.scala | 4 +- .../scala/org/apache/spark/sql/DataFrame.scala | 18 .../org/apache/spark/sql/DataFrameHolder.scala | 2 +- .../org/apache/spark/sql/GroupedData.scala | 10 ++--- .../scala/org/apache/spark/sql/SQLContext.scala | 2 +- .../org/apache/spark/sql/SparkSQLParser.scala | 18 .../columnar/InMemoryColumnarTableScan.scala| 2 +- .../apache/spark/sql/execution/Exchange.scala | 2 +- .../spark/sql/execution/SparkStrategies.scala | 7 ++-- .../joins/BroadcastLeftSemiJoinHash.scala | 2 +- .../sql/execution/stat/FrequentItems.scala | 4 +- .../scala/org/apache/spark/sql/functions.scala | 2 +- .../org/apache/spark/sql/jdbc/JDBCRDD.scala | 44 ++-- .../org/apache/spark/sql/json/InferSchema.scala | 2 +- .../spark/sql/json/JacksonGenerator.scala | 10 ++--- .../org/apache/spark/sql/json/JsonRDD.scala | 8 ++-- .../spark/sql/parquet/ParquetConverter.scala| 12 +++--- .../sql/parquet/ParquetTableOperations.scala| 4 +- .../apache/spark/sql/parquet/ParquetTypes.scala | 2 +- .../org/apache/spark/sql/sources/ddl.scala | 8 ++-- .../spark/sql/ColumnExpressionSuite.scala | 14 +++ .../spark/sql/DataFrameAggregateSuite.scala | 4 +- .../org/apache/spark/sql/DataFrameSuite.scala | 38 - .../scala/org/apache/spark/sql/JoinSuite.scala | 8 ++-- .../org/apache/spark/sql/ListTablesSuite.scala | 2 +- .../org/apache/spark/sql/SQLQuerySuite.scala| 42 +-- .../sql/ScalaReflectionRelationSuite.scala | 4 +- .../scala/org/apache/spark/sql/TestData.scala | 4 +- .../scala/org/apache/spark/sql/UDFSuite.scala | 2 +- .../spark/sql/columnar/ColumnTypeSuite.scala| 6 +-- .../compression/DictionaryEncodingSuite.scala | 4 +- .../compression/IntegralDeltaSuite.scala| 4 +- .../compression/RunLengthEncodingSuite.scala| 10 ++--- .../org/apache/spark/sql/jdbc/JDBCSuite.scala | 4 +- .../org/apache/spark/sql/json/JsonSuite.scala | 2 +- .../apache/spark/sql/sources/DDLTestSuite.scala | 5 +-- .../spark/sql/sources/FilteredScanSuite.scala | 2 +- 37 files changed, 160 insertions(+), 158 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/ff44c711/sql/core/src/main/scala/org/apache/spark/sql/Column.scala -- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Column.scala b/sql/core/src/main/scala/org/apache/spark/sql/Column.scala index 6895aa1..b49b1d3 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/Column.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/Column.scala @@ -349,7 +349,7 @@ class Column(protected[sql] val expr: Expression) extends Logging { * @group expr_ops * @since 1.4.0 */ - def when(condition: Column, value: Any):Column = this.expr match { + def when(condition: Column, value: Any): Column = this.expr match { case CaseWhen(branches: Seq[Expression]) = CaseWhen(branches ++ Seq(lit(condition).expr, lit(value).expr)) case _ = @@ -378,7 +378,7 @@ class Column(protected[sql] val expr: Expression) extends Logging { * @group expr_ops * @since 1.4.0 */ - def otherwise(value: Any):Column = this.expr match { + def otherwise(value: Any): Column = this.expr match { case CaseWhen(branches: Seq[Expression]) = if (branches.size % 2 == 0) { CaseWhen(branches :+ lit(value).expr) http://git-wip-us.apache.org/repos/asf/spark/blob/ff44c711/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala -- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala index f968577..e901094 100644 ---
spark git commit: Remove SizeEstimator from o.a.spark package.
Repository: spark Updated Branches: refs/heads/branch-1.4 b9bdf12a1 - 9c2c6b4a6 Remove SizeEstimator from o.a.spark package. See comments on https://github.com/apache/spark/pull/3913 Author: Reynold Xin r...@databricks.com Closes #6471 from rxin/sizeestimator and squashes the following commits: c057095 [Reynold Xin] Fixed import. 2da478b [Reynold Xin] Remove SizeEstimator from o.a.spark package. (cherry picked from commit 0077af22ca5fcb2e50dcf7daa4f6804ae722bfbe) Signed-off-by: Reynold Xin r...@databricks.com Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/9c2c6b4a Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/9c2c6b4a Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/9c2c6b4a Branch: refs/heads/branch-1.4 Commit: 9c2c6b4a676ea1fdfecd9cd450d43d4081c77385 Parents: b9bdf12 Author: Reynold Xin r...@databricks.com Authored: Thu May 28 16:56:59 2015 -0700 Committer: Reynold Xin r...@databricks.com Committed: Thu May 28 16:57:06 2015 -0700 -- .../scala/org/apache/spark/SizeEstimator.scala | 44 .../org/apache/spark/util/SizeEstimator.scala | 20 +++-- 2 files changed, 17 insertions(+), 47 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/9c2c6b4a/core/src/main/scala/org/apache/spark/SizeEstimator.scala -- diff --git a/core/src/main/scala/org/apache/spark/SizeEstimator.scala b/core/src/main/scala/org/apache/spark/SizeEstimator.scala deleted file mode 100644 index 54fc3a8..000 --- a/core/src/main/scala/org/apache/spark/SizeEstimator.scala +++ /dev/null @@ -1,44 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the License); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - *http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.spark - -import org.apache.spark.annotation.DeveloperApi - -/** - * Estimates the sizes of Java objects (number of bytes of memory they occupy), for use in - * memory-aware caches. - * - * Based on the following JavaWorld article: - * http://www.javaworld.com/javaworld/javaqa/2003-12/02-qa-1226-sizeof.html - */ -@DeveloperApi -object SizeEstimator { - /** - * :: DeveloperApi :: - * Estimate the number of bytes that the given object takes up on the JVM heap. The estimate - * includes space taken up by objects referenced by the given object, their references, and so on - * and so forth. - * - * This is useful for determining the amount of heap space a broadcast variable will occupy on - * each executor or the amount of space each object will take when caching objects in - * deserialized form. This is not the same as the serialized size of the object, which will - * typically be much smaller. - */ - @DeveloperApi - def estimate(obj: AnyRef): Long = org.apache.spark.util.SizeEstimator.estimate(obj) -} http://git-wip-us.apache.org/repos/asf/spark/blob/9c2c6b4a/core/src/main/scala/org/apache/spark/util/SizeEstimator.scala -- diff --git a/core/src/main/scala/org/apache/spark/util/SizeEstimator.scala b/core/src/main/scala/org/apache/spark/util/SizeEstimator.scala index d91c329..774f9c7 100644 --- a/core/src/main/scala/org/apache/spark/util/SizeEstimator.scala +++ b/core/src/main/scala/org/apache/spark/util/SizeEstimator.scala @@ -21,21 +21,37 @@ import java.lang.management.ManagementFactory import java.lang.reflect.{Field, Modifier} import java.util.{IdentityHashMap, Random} import java.util.concurrent.ConcurrentHashMap + import scala.collection.mutable.ArrayBuffer import scala.runtime.ScalaRunTime import org.apache.spark.Logging +import org.apache.spark.annotation.DeveloperApi import org.apache.spark.util.collection.OpenHashSet /** + * :: DeveloperApi :: * Estimates the sizes of Java objects (number of bytes of memory they occupy), for use in * memory-aware caches. * * Based on the following JavaWorld article: * http://www.javaworld.com/javaworld/javaqa/2003-12/02-qa-1226-sizeof.html */ -private[spark] object SizeEstimator extends Logging { +@DeveloperApi +object
spark git commit: [SPARK-7933] Remove Patrick's username/pw from merge script
Repository: spark Updated Branches: refs/heads/branch-1.4 3b38c06f0 - 3479e6a12 [SPARK-7933] Remove Patrick's username/pw from merge script Looks like this was added by accident when pwendell merged a commit back in September: fe2b1d6a209db9fe96b1c6630677955b94bd48c9 Author: Kay Ousterhout kayousterh...@gmail.com Closes #6485 from kayousterhout/SPARK-7933 and squashes the following commits: 7c6164a [Kay Ousterhout] [SPARK-7933] Remove Patrick's username/pw from merge script (cherry picked from commit 66c49ed60dcef48a6b38ae2d2c4c479933f3aa19) Signed-off-by: Patrick Wendell patr...@databricks.com Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/3479e6a1 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/3479e6a1 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/3479e6a1 Branch: refs/heads/branch-1.4 Commit: 3479e6a127d0b93ef38533fdad02a49850716583 Parents: 3b38c06 Author: Kay Ousterhout kayousterh...@gmail.com Authored: Thu May 28 19:04:32 2015 -0700 Committer: Patrick Wendell patr...@databricks.com Committed: Thu May 28 19:04:51 2015 -0700 -- dev/merge_spark_pr.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/3479e6a1/dev/merge_spark_pr.py -- diff --git a/dev/merge_spark_pr.py b/dev/merge_spark_pr.py index b69cd15..f583fda 100755 --- a/dev/merge_spark_pr.py +++ b/dev/merge_spark_pr.py @@ -44,9 +44,9 @@ PR_REMOTE_NAME = os.environ.get(PR_REMOTE_NAME, apache-github) # Remote name which points to Apache git PUSH_REMOTE_NAME = os.environ.get(PUSH_REMOTE_NAME, apache) # ASF JIRA username -JIRA_USERNAME = os.environ.get(JIRA_USERNAME, pwendell) +JIRA_USERNAME = os.environ.get(JIRA_USERNAME, ) # ASF JIRA password -JIRA_PASSWORD = os.environ.get(JIRA_PASSWORD, 35500) +JIRA_PASSWORD = os.environ.get(JIRA_PASSWORD, ) GITHUB_BASE = https://github.com/apache/spark/pull; GITHUB_API_BASE = https://api.github.com/repos/apache/spark; - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-7933] Remove Patrick's username/pw from merge script
Repository: spark Updated Branches: refs/heads/master ee6a0e12f - 66c49ed60 [SPARK-7933] Remove Patrick's username/pw from merge script Looks like this was added by accident when pwendell merged a commit back in September: fe2b1d6a209db9fe96b1c6630677955b94bd48c9 Author: Kay Ousterhout kayousterh...@gmail.com Closes #6485 from kayousterhout/SPARK-7933 and squashes the following commits: 7c6164a [Kay Ousterhout] [SPARK-7933] Remove Patrick's username/pw from merge script Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/66c49ed6 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/66c49ed6 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/66c49ed6 Branch: refs/heads/master Commit: 66c49ed60dcef48a6b38ae2d2c4c479933f3aa19 Parents: ee6a0e1 Author: Kay Ousterhout kayousterh...@gmail.com Authored: Thu May 28 19:04:32 2015 -0700 Committer: Patrick Wendell patr...@databricks.com Committed: Thu May 28 19:04:32 2015 -0700 -- dev/merge_spark_pr.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/66c49ed6/dev/merge_spark_pr.py -- diff --git a/dev/merge_spark_pr.py b/dev/merge_spark_pr.py index 787c5cc..cd83b35 100755 --- a/dev/merge_spark_pr.py +++ b/dev/merge_spark_pr.py @@ -44,9 +44,9 @@ PR_REMOTE_NAME = os.environ.get(PR_REMOTE_NAME, apache-github) # Remote name which points to Apache git PUSH_REMOTE_NAME = os.environ.get(PUSH_REMOTE_NAME, apache) # ASF JIRA username -JIRA_USERNAME = os.environ.get(JIRA_USERNAME, pwendell) +JIRA_USERNAME = os.environ.get(JIRA_USERNAME, ) # ASF JIRA password -JIRA_PASSWORD = os.environ.get(JIRA_PASSWORD, 35500) +JIRA_PASSWORD = os.environ.get(JIRA_PASSWORD, ) GITHUB_BASE = https://github.com/apache/spark/pull; GITHUB_API_BASE = https://api.github.com/repos/apache/spark; - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-7927] whitespace fixes for streaming.
Repository: spark Updated Branches: refs/heads/master 1bd63e82f - 3af0b3136 [SPARK-7927] whitespace fixes for streaming. So we can enable a whitespace enforcement rule in the style checker to save code review time. Author: Reynold Xin r...@databricks.com Closes #6475 from rxin/whitespace-streaming and squashes the following commits: 810dae4 [Reynold Xin] Fixed tests. 89068ad [Reynold Xin] [SPARK-7927] whitespace fixes for streaming. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/3af0b313 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/3af0b313 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/3af0b313 Branch: refs/heads/master Commit: 3af0b3136e4b7dea52c413d640653ccddc638574 Parents: 1bd63e8 Author: Reynold Xin r...@databricks.com Authored: Thu May 28 17:55:22 2015 -0700 Committer: Reynold Xin r...@databricks.com Committed: Thu May 28 17:55:22 2015 -0700 -- .../scala/org/apache/spark/streaming/StreamingContext.scala | 2 +- .../apache/spark/streaming/api/java/JavaPairDStream.scala| 8 .../scala/org/apache/spark/streaming/dstream/DStream.scala | 2 +- .../apache/spark/streaming/dstream/FileInputDStream.scala| 8 .../spark/streaming/dstream/PairDStreamFunctions.scala | 2 +- .../spark/streaming/dstream/ReducedWindowedDStream.scala | 8 .../org/apache/spark/streaming/dstream/ShuffledDStream.scala | 6 +++--- .../org/apache/spark/streaming/dstream/StateDStream.scala| 2 +- .../org/apache/spark/streaming/dstream/WindowedDStream.scala | 4 ++-- .../org/apache/spark/streaming/receiver/BlockGenerator.scala | 2 +- .../org/apache/spark/streaming/receiver/RateLimiter.scala| 3 ++- .../apache/spark/streaming/scheduler/ReceiverTracker.scala | 2 +- .../apache/spark/streaming/util/FileBasedWriteAheadLog.scala | 2 +- .../org/apache/spark/streaming/util/RawTextHelper.scala | 4 ++-- .../org/apache/spark/streaming/BasicOperationsSuite.scala| 6 +++--- .../scala/org/apache/spark/streaming/InputStreamsSuite.scala | 2 +- .../org/apache/spark/streaming/StreamingContextSuite.scala | 4 +++- .../org/apache/spark/streaming/StreamingListenerSuite.scala | 6 -- .../streaming/ui/StreamingJobProgressListenerSuite.scala | 2 +- 19 files changed, 40 insertions(+), 35 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/3af0b313/streaming/src/main/scala/org/apache/spark/streaming/StreamingContext.scala -- diff --git a/streaming/src/main/scala/org/apache/spark/streaming/StreamingContext.scala b/streaming/src/main/scala/org/apache/spark/streaming/StreamingContext.scala index 5e58ed7..25842d5 100644 --- a/streaming/src/main/scala/org/apache/spark/streaming/StreamingContext.scala +++ b/streaming/src/main/scala/org/apache/spark/streaming/StreamingContext.scala @@ -461,7 +461,7 @@ class StreamingContext private[streaming] ( val conf = sc_.hadoopConfiguration conf.setInt(FixedLengthBinaryInputFormat.RECORD_LENGTH_PROPERTY, recordLength) val br = fileStream[LongWritable, BytesWritable, FixedLengthBinaryInputFormat]( - directory, FileInputDStream.defaultFilter : Path = Boolean, newFilesOnly=true, conf) + directory, FileInputDStream.defaultFilter: Path = Boolean, newFilesOnly = true, conf) val data = br.map { case (k, v) = val bytes = v.getBytes require(bytes.length == recordLength, Byte array does not have correct length. + http://git-wip-us.apache.org/repos/asf/spark/blob/3af0b313/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaPairDStream.scala -- diff --git a/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaPairDStream.scala b/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaPairDStream.scala index 93baad1..959ac9c 100644 --- a/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaPairDStream.scala +++ b/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaPairDStream.scala @@ -227,7 +227,7 @@ class JavaPairDStream[K, V](val dstream: DStream[(K, V)])( * @param numPartitions Number of partitions of each RDD in the new DStream. */ def groupByKeyAndWindow(windowDuration: Duration, slideDuration: Duration, numPartitions: Int) - :JavaPairDStream[K, JIterable[V]] = { +: JavaPairDStream[K, JIterable[V]] = { dstream.groupByKeyAndWindow(windowDuration, slideDuration, numPartitions) .mapValues(asJavaIterable _) } @@ -247,7 +247,7 @@ class JavaPairDStream[K, V](val dstream: DStream[(K, V)])( windowDuration: Duration, slideDuration: Duration, partitioner: Partitioner -):JavaPairDStream[K, JIterable[V]] = { +
spark git commit: [SPARK-7927] whitespace fixes for streaming.
Repository: spark Updated Branches: refs/heads/branch-1.4 7bb445a38 - f4b135337 [SPARK-7927] whitespace fixes for streaming. So we can enable a whitespace enforcement rule in the style checker to save code review time. Author: Reynold Xin r...@databricks.com Closes #6475 from rxin/whitespace-streaming and squashes the following commits: 810dae4 [Reynold Xin] Fixed tests. 89068ad [Reynold Xin] [SPARK-7927] whitespace fixes for streaming. (cherry picked from commit 3af0b3136e4b7dea52c413d640653ccddc638574) Signed-off-by: Reynold Xin r...@databricks.com Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/f4b13533 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/f4b13533 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/f4b13533 Branch: refs/heads/branch-1.4 Commit: f4b135337c5032dcd224ebd14e134aa8de0c1667 Parents: 7bb445a Author: Reynold Xin r...@databricks.com Authored: Thu May 28 17:55:22 2015 -0700 Committer: Reynold Xin r...@databricks.com Committed: Thu May 28 17:55:29 2015 -0700 -- .../scala/org/apache/spark/streaming/StreamingContext.scala | 2 +- .../apache/spark/streaming/api/java/JavaPairDStream.scala| 8 .../scala/org/apache/spark/streaming/dstream/DStream.scala | 2 +- .../apache/spark/streaming/dstream/FileInputDStream.scala| 8 .../spark/streaming/dstream/PairDStreamFunctions.scala | 2 +- .../spark/streaming/dstream/ReducedWindowedDStream.scala | 8 .../org/apache/spark/streaming/dstream/ShuffledDStream.scala | 6 +++--- .../org/apache/spark/streaming/dstream/StateDStream.scala| 2 +- .../org/apache/spark/streaming/dstream/WindowedDStream.scala | 4 ++-- .../org/apache/spark/streaming/receiver/BlockGenerator.scala | 2 +- .../org/apache/spark/streaming/receiver/RateLimiter.scala| 3 ++- .../apache/spark/streaming/scheduler/ReceiverTracker.scala | 2 +- .../apache/spark/streaming/util/FileBasedWriteAheadLog.scala | 2 +- .../org/apache/spark/streaming/util/RawTextHelper.scala | 4 ++-- .../org/apache/spark/streaming/BasicOperationsSuite.scala| 6 +++--- .../scala/org/apache/spark/streaming/InputStreamsSuite.scala | 2 +- .../org/apache/spark/streaming/StreamingContextSuite.scala | 4 +++- .../org/apache/spark/streaming/StreamingListenerSuite.scala | 6 -- .../streaming/ui/StreamingJobProgressListenerSuite.scala | 2 +- 19 files changed, 40 insertions(+), 35 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/f4b13533/streaming/src/main/scala/org/apache/spark/streaming/StreamingContext.scala -- diff --git a/streaming/src/main/scala/org/apache/spark/streaming/StreamingContext.scala b/streaming/src/main/scala/org/apache/spark/streaming/StreamingContext.scala index 5e58ed7..25842d5 100644 --- a/streaming/src/main/scala/org/apache/spark/streaming/StreamingContext.scala +++ b/streaming/src/main/scala/org/apache/spark/streaming/StreamingContext.scala @@ -461,7 +461,7 @@ class StreamingContext private[streaming] ( val conf = sc_.hadoopConfiguration conf.setInt(FixedLengthBinaryInputFormat.RECORD_LENGTH_PROPERTY, recordLength) val br = fileStream[LongWritable, BytesWritable, FixedLengthBinaryInputFormat]( - directory, FileInputDStream.defaultFilter : Path = Boolean, newFilesOnly=true, conf) + directory, FileInputDStream.defaultFilter: Path = Boolean, newFilesOnly = true, conf) val data = br.map { case (k, v) = val bytes = v.getBytes require(bytes.length == recordLength, Byte array does not have correct length. + http://git-wip-us.apache.org/repos/asf/spark/blob/f4b13533/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaPairDStream.scala -- diff --git a/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaPairDStream.scala b/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaPairDStream.scala index 93baad1..959ac9c 100644 --- a/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaPairDStream.scala +++ b/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaPairDStream.scala @@ -227,7 +227,7 @@ class JavaPairDStream[K, V](val dstream: DStream[(K, V)])( * @param numPartitions Number of partitions of each RDD in the new DStream. */ def groupByKeyAndWindow(windowDuration: Duration, slideDuration: Duration, numPartitions: Int) - :JavaPairDStream[K, JIterable[V]] = { +: JavaPairDStream[K, JIterable[V]] = { dstream.groupByKeyAndWindow(windowDuration, slideDuration, numPartitions) .mapValues(asJavaIterable _) } @@ -247,7 +247,7 @@ class JavaPairDStream[K, V](val dstream: DStream[(K, V)])(
spark git commit: [SPARK-7927] whitespace fixes for GraphX.
Repository: spark Updated Branches: refs/heads/branch-1.4 e3dd2802f - b3a590061 [SPARK-7927] whitespace fixes for GraphX. So we can enable a whitespace enforcement rule in the style checker to save code review time. Author: Reynold Xin r...@databricks.com Closes #6474 from rxin/whitespace-graphx and squashes the following commits: 4d3cd26 [Reynold Xin] Fixed tests. 869dde4 [Reynold Xin] [SPARK-7927] whitespace fixes for GraphX. (cherry picked from commit b069ad23d9b6cbfb3a8bf245547add4816669075) Signed-off-by: Reynold Xin r...@databricks.com Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/b3a59006 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/b3a59006 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/b3a59006 Branch: refs/heads/branch-1.4 Commit: b3a590061da09674cb0ff868c808985ea846145e Parents: e3dd280 Author: Reynold Xin r...@databricks.com Authored: Thu May 28 20:17:16 2015 -0700 Committer: Reynold Xin r...@databricks.com Committed: Thu May 28 20:17:28 2015 -0700 -- .../org/apache/spark/graphx/EdgeDirection.scala | 4 ++-- .../scala/org/apache/spark/graphx/EdgeTriplet.scala | 2 +- .../main/scala/org/apache/spark/graphx/Graph.scala | 2 +- .../scala/org/apache/spark/graphx/GraphOps.scala | 10 +- .../main/scala/org/apache/spark/graphx/Pregel.scala | 8 .../org/apache/spark/graphx/impl/EdgePartition.scala | 4 ++-- .../scala/org/apache/spark/graphx/lib/PageRank.scala | 8 .../org/apache/spark/graphx/lib/SVDPlusPlus.scala| 2 +- .../org/apache/spark/graphx/lib/TriangleCount.scala | 4 ++-- .../apache/spark/graphx/util/GraphGenerators.scala | 9 + .../org/apache/spark/graphx/GraphOpsSuite.scala | 6 +++--- .../scala/org/apache/spark/graphx/GraphSuite.scala | 6 +++--- .../spark/graphx/lib/ConnectedComponentsSuite.scala | 15 +-- .../org/apache/spark/graphx/lib/PageRankSuite.scala | 14 +++--- .../apache/spark/graphx/lib/TriangleCountSuite.scala | 2 +- 15 files changed, 50 insertions(+), 46 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/b3a59006/graphx/src/main/scala/org/apache/spark/graphx/EdgeDirection.scala -- diff --git a/graphx/src/main/scala/org/apache/spark/graphx/EdgeDirection.scala b/graphx/src/main/scala/org/apache/spark/graphx/EdgeDirection.scala index 058c8c8..ce1054e 100644 --- a/graphx/src/main/scala/org/apache/spark/graphx/EdgeDirection.scala +++ b/graphx/src/main/scala/org/apache/spark/graphx/EdgeDirection.scala @@ -26,8 +26,8 @@ class EdgeDirection private (private val name: String) extends Serializable { * out becomes in and both and either remain the same. */ def reverse: EdgeDirection = this match { -case EdgeDirection.In = EdgeDirection.Out -case EdgeDirection.Out = EdgeDirection.In +case EdgeDirection.In = EdgeDirection.Out +case EdgeDirection.Out = EdgeDirection.In case EdgeDirection.Either = EdgeDirection.Either case EdgeDirection.Both = EdgeDirection.Both } http://git-wip-us.apache.org/repos/asf/spark/blob/b3a59006/graphx/src/main/scala/org/apache/spark/graphx/EdgeTriplet.scala -- diff --git a/graphx/src/main/scala/org/apache/spark/graphx/EdgeTriplet.scala b/graphx/src/main/scala/org/apache/spark/graphx/EdgeTriplet.scala index c8790ca..65f8242 100644 --- a/graphx/src/main/scala/org/apache/spark/graphx/EdgeTriplet.scala +++ b/graphx/src/main/scala/org/apache/spark/graphx/EdgeTriplet.scala @@ -37,7 +37,7 @@ class EdgeTriplet[VD, ED] extends Edge[ED] { /** * Set the edge properties of this triplet. */ - protected[spark] def set(other: Edge[ED]): EdgeTriplet[VD,ED] = { + protected[spark] def set(other: Edge[ED]): EdgeTriplet[VD, ED] = { srcId = other.srcId dstId = other.dstId attr = other.attr http://git-wip-us.apache.org/repos/asf/spark/blob/b3a59006/graphx/src/main/scala/org/apache/spark/graphx/Graph.scala -- diff --git a/graphx/src/main/scala/org/apache/spark/graphx/Graph.scala b/graphx/src/main/scala/org/apache/spark/graphx/Graph.scala index 36dc7b0..db73a8a 100644 --- a/graphx/src/main/scala/org/apache/spark/graphx/Graph.scala +++ b/graphx/src/main/scala/org/apache/spark/graphx/Graph.scala @@ -316,7 +316,7 @@ abstract class Graph[VD: ClassTag, ED: ClassTag] protected () extends Serializab * satisfy the predicates */ def subgraph( - epred: EdgeTriplet[VD,ED] = Boolean = (x = true), + epred: EdgeTriplet[VD, ED] = Boolean = (x = true), vpred: (VertexId, VD) = Boolean = ((v, d) = true)) : Graph[VD, ED]
spark git commit: [SPARK-7927] whitespace fixes for GraphX.
Repository: spark Updated Branches: refs/heads/master 7f7505d8d - b069ad23d [SPARK-7927] whitespace fixes for GraphX. So we can enable a whitespace enforcement rule in the style checker to save code review time. Author: Reynold Xin r...@databricks.com Closes #6474 from rxin/whitespace-graphx and squashes the following commits: 4d3cd26 [Reynold Xin] Fixed tests. 869dde4 [Reynold Xin] [SPARK-7927] whitespace fixes for GraphX. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/b069ad23 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/b069ad23 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/b069ad23 Branch: refs/heads/master Commit: b069ad23d9b6cbfb3a8bf245547add4816669075 Parents: 7f7505d Author: Reynold Xin r...@databricks.com Authored: Thu May 28 20:17:16 2015 -0700 Committer: Reynold Xin r...@databricks.com Committed: Thu May 28 20:17:16 2015 -0700 -- .../org/apache/spark/graphx/EdgeDirection.scala | 4 ++-- .../scala/org/apache/spark/graphx/EdgeTriplet.scala | 2 +- .../main/scala/org/apache/spark/graphx/Graph.scala | 2 +- .../scala/org/apache/spark/graphx/GraphOps.scala | 10 +- .../main/scala/org/apache/spark/graphx/Pregel.scala | 8 .../org/apache/spark/graphx/impl/EdgePartition.scala | 4 ++-- .../scala/org/apache/spark/graphx/lib/PageRank.scala | 8 .../org/apache/spark/graphx/lib/SVDPlusPlus.scala| 2 +- .../org/apache/spark/graphx/lib/TriangleCount.scala | 4 ++-- .../apache/spark/graphx/util/GraphGenerators.scala | 9 + .../org/apache/spark/graphx/GraphOpsSuite.scala | 6 +++--- .../scala/org/apache/spark/graphx/GraphSuite.scala | 6 +++--- .../spark/graphx/lib/ConnectedComponentsSuite.scala | 15 +-- .../org/apache/spark/graphx/lib/PageRankSuite.scala | 14 +++--- .../apache/spark/graphx/lib/TriangleCountSuite.scala | 2 +- 15 files changed, 50 insertions(+), 46 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/b069ad23/graphx/src/main/scala/org/apache/spark/graphx/EdgeDirection.scala -- diff --git a/graphx/src/main/scala/org/apache/spark/graphx/EdgeDirection.scala b/graphx/src/main/scala/org/apache/spark/graphx/EdgeDirection.scala index 058c8c8..ce1054e 100644 --- a/graphx/src/main/scala/org/apache/spark/graphx/EdgeDirection.scala +++ b/graphx/src/main/scala/org/apache/spark/graphx/EdgeDirection.scala @@ -26,8 +26,8 @@ class EdgeDirection private (private val name: String) extends Serializable { * out becomes in and both and either remain the same. */ def reverse: EdgeDirection = this match { -case EdgeDirection.In = EdgeDirection.Out -case EdgeDirection.Out = EdgeDirection.In +case EdgeDirection.In = EdgeDirection.Out +case EdgeDirection.Out = EdgeDirection.In case EdgeDirection.Either = EdgeDirection.Either case EdgeDirection.Both = EdgeDirection.Both } http://git-wip-us.apache.org/repos/asf/spark/blob/b069ad23/graphx/src/main/scala/org/apache/spark/graphx/EdgeTriplet.scala -- diff --git a/graphx/src/main/scala/org/apache/spark/graphx/EdgeTriplet.scala b/graphx/src/main/scala/org/apache/spark/graphx/EdgeTriplet.scala index c8790ca..65f8242 100644 --- a/graphx/src/main/scala/org/apache/spark/graphx/EdgeTriplet.scala +++ b/graphx/src/main/scala/org/apache/spark/graphx/EdgeTriplet.scala @@ -37,7 +37,7 @@ class EdgeTriplet[VD, ED] extends Edge[ED] { /** * Set the edge properties of this triplet. */ - protected[spark] def set(other: Edge[ED]): EdgeTriplet[VD,ED] = { + protected[spark] def set(other: Edge[ED]): EdgeTriplet[VD, ED] = { srcId = other.srcId dstId = other.dstId attr = other.attr http://git-wip-us.apache.org/repos/asf/spark/blob/b069ad23/graphx/src/main/scala/org/apache/spark/graphx/Graph.scala -- diff --git a/graphx/src/main/scala/org/apache/spark/graphx/Graph.scala b/graphx/src/main/scala/org/apache/spark/graphx/Graph.scala index 36dc7b0..db73a8a 100644 --- a/graphx/src/main/scala/org/apache/spark/graphx/Graph.scala +++ b/graphx/src/main/scala/org/apache/spark/graphx/Graph.scala @@ -316,7 +316,7 @@ abstract class Graph[VD: ClassTag, ED: ClassTag] protected () extends Serializab * satisfy the predicates */ def subgraph( - epred: EdgeTriplet[VD,ED] = Boolean = (x = true), + epred: EdgeTriplet[VD, ED] = Boolean = (x = true), vpred: (VertexId, VD) = Boolean = ((v, d) = true)) : Graph[VD, ED] http://git-wip-us.apache.org/repos/asf/spark/blob/b069ad23/graphx/src/main/scala/org/apache/spark/graphx/GraphOps.scala
[1/2] spark git commit: [SPARK-7927] whitespace fixes for core.
Repository: spark Updated Branches: refs/heads/branch-1.4 22e42e3fe - e3dd2802f http://git-wip-us.apache.org/repos/asf/spark/blob/e3dd2802/core/src/test/scala/org/apache/spark/deploy/worker/WorkerArgumentsTest.scala -- diff --git a/core/src/test/scala/org/apache/spark/deploy/worker/WorkerArgumentsTest.scala b/core/src/test/scala/org/apache/spark/deploy/worker/WorkerArgumentsTest.scala index 7cc2104..e432b8e 100644 --- a/core/src/test/scala/org/apache/spark/deploy/worker/WorkerArgumentsTest.scala +++ b/core/src/test/scala/org/apache/spark/deploy/worker/WorkerArgumentsTest.scala @@ -66,7 +66,7 @@ class WorkerArgumentsTest extends FunSuite { } } val conf = new MySparkConf() -val workerArgs = new WorkerArguments(args, conf) +val workerArgs = new WorkerArguments(args, conf) assert(workerArgs.memory === 5120) } http://git-wip-us.apache.org/repos/asf/spark/blob/e3dd2802/core/src/test/scala/org/apache/spark/deploy/worker/WorkerSuite.scala -- diff --git a/core/src/test/scala/org/apache/spark/deploy/worker/WorkerSuite.scala b/core/src/test/scala/org/apache/spark/deploy/worker/WorkerSuite.scala index 450fba2..93a779d 100644 --- a/core/src/test/scala/org/apache/spark/deploy/worker/WorkerSuite.scala +++ b/core/src/test/scala/org/apache/spark/deploy/worker/WorkerSuite.scala @@ -25,7 +25,7 @@ import org.scalatest.{Matchers, FunSuite} class WorkerSuite extends FunSuite with Matchers { def cmd(javaOpts: String*): Command = { -Command(, Seq.empty, Map.empty, Seq.empty, Seq.empty, Seq(javaOpts:_*)) +Command(, Seq.empty, Map.empty, Seq.empty, Seq.empty, Seq(javaOpts : _*)) } def conf(opts: (String, String)*): SparkConf = new SparkConf(loadDefaults = false).setAll(opts) http://git-wip-us.apache.org/repos/asf/spark/blob/e3dd2802/core/src/test/scala/org/apache/spark/metrics/InputOutputMetricsSuite.scala -- diff --git a/core/src/test/scala/org/apache/spark/metrics/InputOutputMetricsSuite.scala b/core/src/test/scala/org/apache/spark/metrics/InputOutputMetricsSuite.scala index ef3e213..60dba3b 100644 --- a/core/src/test/scala/org/apache/spark/metrics/InputOutputMetricsSuite.scala +++ b/core/src/test/scala/org/apache/spark/metrics/InputOutputMetricsSuite.scala @@ -263,7 +263,7 @@ class InputOutputMetricsSuite extends FunSuite with SharedSparkContext val tmpRdd = sc.textFile(tmpFilePath, numPartitions) -val firstSize= runAndReturnBytesRead { +val firstSize = runAndReturnBytesRead { aRdd.count() } val secondSize = runAndReturnBytesRead { @@ -433,10 +433,10 @@ class OldCombineTextRecordReaderWrapper( /** * Hadoop 2 has a version of this, but we can't use it for backwards compatibility */ -class NewCombineTextInputFormat extends NewCombineFileInputFormat[LongWritable,Text] { +class NewCombineTextInputFormat extends NewCombineFileInputFormat[LongWritable, Text] { def createRecordReader(split: NewInputSplit, context: TaskAttemptContext) : NewRecordReader[LongWritable, Text] = { -new NewCombineFileRecordReader[LongWritable,Text](split.asInstanceOf[NewCombineFileSplit], +new NewCombineFileRecordReader[LongWritable, Text](split.asInstanceOf[NewCombineFileSplit], context, classOf[NewCombineTextRecordReaderWrapper]) } } http://git-wip-us.apache.org/repos/asf/spark/blob/e3dd2802/core/src/test/scala/org/apache/spark/rdd/PairRDDFunctionsSuite.scala -- diff --git a/core/src/test/scala/org/apache/spark/rdd/PairRDDFunctionsSuite.scala b/core/src/test/scala/org/apache/spark/rdd/PairRDDFunctionsSuite.scala index ca0d953..6564232 100644 --- a/core/src/test/scala/org/apache/spark/rdd/PairRDDFunctionsSuite.scala +++ b/core/src/test/scala/org/apache/spark/rdd/PairRDDFunctionsSuite.scala @@ -512,17 +512,17 @@ class PairRDDFunctionsSuite extends FunSuite with SharedSparkContext { } test(lookup) { -val pairs = sc.parallelize(Array((1,2), (3,4), (5,6), (5,7))) +val pairs = sc.parallelize(Array((1, 2), (3, 4), (5, 6), (5, 7))) assert(pairs.partitioner === None) assert(pairs.lookup(1) === Seq(2)) -assert(pairs.lookup(5) === Seq(6,7)) +assert(pairs.lookup(5) === Seq(6, 7)) assert(pairs.lookup(-1) === Seq()) } test(lookup with partitioner) { -val pairs = sc.parallelize(Array((1,2), (3,4), (5,6), (5,7))) +val pairs = sc.parallelize(Array((1, 2), (3, 4), (5, 6), (5, 7))) val p = new Partitioner { def numPartitions: Int = 2 @@ -533,12 +533,12 @@ class PairRDDFunctionsSuite extends FunSuite with SharedSparkContext { assert(shuffled.partitioner === Some(p)) assert(shuffled.lookup(1) === Seq(2)) -assert(shuffled.lookup(5) === Seq(6,7)) +assert(shuffled.lookup(5)
[2/2] spark git commit: [SPARK-7927] whitespace fixes for core.
[SPARK-7927] whitespace fixes for core. So we can enable a whitespace enforcement rule in the style checker to save code review time. Author: Reynold Xin r...@databricks.com Closes #6473 from rxin/whitespace-core and squashes the following commits: 058195d [Reynold Xin] Fixed tests. fce11e9 [Reynold Xin] [SPARK-7927] whitespace fixes for core. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/7f7505d8 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/7f7505d8 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/7f7505d8 Branch: refs/heads/master Commit: 7f7505d8db7759ea46e904f767c23130eff1104a Parents: 8da560d Author: Reynold Xin r...@databricks.com Authored: Thu May 28 20:15:52 2015 -0700 Committer: Reynold Xin r...@databricks.com Committed: Thu May 28 20:15:52 2015 -0700 -- .../scala/org/apache/spark/Accumulators.scala | 2 +- .../scala/org/apache/spark/Aggregator.scala | 4 +-- .../scala/org/apache/spark/Partitioner.scala| 8 ++--- .../main/scala/org/apache/spark/SparkConf.scala | 2 +- .../scala/org/apache/spark/SparkContext.scala | 10 +++--- .../main/scala/org/apache/spark/SparkEnv.scala | 4 +-- .../org/apache/spark/SparkHadoopWriter.scala| 10 +++--- .../org/apache/spark/api/java/JavaRDDLike.scala | 2 +- .../org/apache/spark/api/python/PythonRDD.scala | 2 +- .../apache/spark/api/r/RBackendHandler.scala| 4 +-- .../scala/org/apache/spark/api/r/RRDD.scala | 2 +- .../apache/spark/broadcast/HttpBroadcast.scala | 4 +-- .../spark/deploy/FaultToleranceTest.scala | 2 +- .../org/apache/spark/deploy/SparkSubmit.scala | 2 +- .../org/apache/spark/deploy/worker/Worker.scala | 2 +- .../org/apache/spark/executor/TaskMetrics.scala | 2 +- .../mapreduce/SparkHadoopMapReduceUtil.scala| 2 +- .../apache/spark/network/nio/BlockMessage.scala | 2 +- .../spark/network/nio/BlockMessageArray.scala | 4 +-- .../spark/network/nio/SecurityMessage.scala | 2 +- .../spark/partial/GroupedCountEvaluator.scala | 6 ++-- .../org/apache/spark/rdd/CheckpointRDD.scala| 2 +- .../org/apache/spark/rdd/CoalescedRDD.scala | 4 +-- .../org/apache/spark/rdd/PairRDDFunctions.scala | 6 ++-- .../main/scala/org/apache/spark/rdd/RDD.scala | 12 +++ .../spark/rdd/SequenceFileRDDFunctions.scala| 6 ++-- .../org/apache/spark/rdd/SubtractedRDD.scala| 2 +- .../apache/spark/rdd/ZippedPartitionsRDD.scala | 2 +- .../apache/spark/scheduler/DAGScheduler.scala | 4 +-- .../spark/scheduler/DAGSchedulerSource.scala| 3 +- .../spark/scheduler/SchedulingAlgorithm.scala | 2 +- .../apache/spark/scheduler/SparkListener.scala | 6 ++-- .../apache/spark/scheduler/TaskSetManager.scala | 4 +-- .../cluster/CoarseGrainedClusterMessage.scala | 3 +- .../cluster/YarnSchedulerBackend.scala | 2 +- .../mesos/CoarseMesosSchedulerBackend.scala | 2 +- .../cluster/mesos/MesosSchedulerBackend.scala | 2 +- .../mesos/MesosSchedulerBackendUtil.scala | 2 +- .../spark/status/api/v1/AllStagesResource.scala | 4 +-- .../spark/status/api/v1/ApiRootResource.scala | 8 ++--- .../spark/status/api/v1/OneRDDResource.scala| 2 +- .../org/apache/spark/status/api/v1/api.scala| 2 +- .../storage/BlockManagerSlaveEndpoint.scala | 2 +- .../spark/storage/BlockManagerSource.scala | 3 +- .../scala/org/apache/spark/ui/SparkUI.scala | 2 +- .../scala/org/apache/spark/ui/UIUtils.scala | 2 +- .../apache/spark/ui/UIWorkloadGenerator.scala | 2 +- .../org/apache/spark/ui/storage/RDDPage.scala | 2 +- .../scala/org/apache/spark/util/AkkaUtils.scala | 2 +- .../apache/spark/util/CompletionIterator.scala | 2 +- .../org/apache/spark/util/Distribution.scala| 4 +-- .../org/apache/spark/util/MetadataCleaner.scala | 2 +- .../org/apache/spark/util/MutablePair.scala | 2 +- .../org/apache/spark/util/SizeEstimator.scala | 16 - .../scala/org/apache/spark/util/Utils.scala | 2 +- .../apache/spark/util/collection/BitSet.scala | 2 +- .../spark/util/collection/SortDataFormat.scala | 4 +-- .../util/random/StratifiedSamplingUtils.scala | 2 +- .../org/apache/spark/AccumulatorSuite.scala | 2 +- .../org/apache/spark/CheckpointSuite.scala | 4 +-- .../org/apache/spark/ContextCleanerSuite.scala | 6 ++-- .../scala/org/apache/spark/FailureSuite.scala | 2 +- .../org/apache/spark/FileServerSuite.scala | 20 ++-- .../test/scala/org/apache/spark/FileSuite.scala | 2 +- .../apache/spark/ImplicitOrderingSuite.scala| 4 +-- .../scala/org/apache/spark/SparkConfSuite.scala | 12 +++ .../org/apache/spark/SparkContextSuite.scala| 14 .../apache/spark/broadcast/BroadcastSuite.scala | 2 +- .../deploy/worker/WorkerArgumentsTest.scala | 2 +- .../spark/deploy/worker/WorkerSuite.scala | 2
[2/2] spark git commit: [SPARK-7927] whitespace fixes for core.
[SPARK-7927] whitespace fixes for core. So we can enable a whitespace enforcement rule in the style checker to save code review time. Author: Reynold Xin r...@databricks.com Closes #6473 from rxin/whitespace-core and squashes the following commits: 058195d [Reynold Xin] Fixed tests. fce11e9 [Reynold Xin] [SPARK-7927] whitespace fixes for core. (cherry picked from commit 7f7505d8db7759ea46e904f767c23130eff1104a) Signed-off-by: Reynold Xin r...@databricks.com Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/e3dd2802 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/e3dd2802 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/e3dd2802 Branch: refs/heads/branch-1.4 Commit: e3dd2802f6dd8b2df9fb73d8e9901c4e6e4d6b84 Parents: 22e42e3 Author: Reynold Xin r...@databricks.com Authored: Thu May 28 20:15:52 2015 -0700 Committer: Reynold Xin r...@databricks.com Committed: Thu May 28 20:16:35 2015 -0700 -- .../scala/org/apache/spark/Accumulators.scala | 2 +- .../scala/org/apache/spark/Aggregator.scala | 4 +-- .../scala/org/apache/spark/Partitioner.scala| 8 ++--- .../main/scala/org/apache/spark/SparkConf.scala | 2 +- .../scala/org/apache/spark/SparkContext.scala | 10 +++--- .../main/scala/org/apache/spark/SparkEnv.scala | 4 +-- .../org/apache/spark/SparkHadoopWriter.scala| 10 +++--- .../org/apache/spark/api/java/JavaRDDLike.scala | 2 +- .../org/apache/spark/api/python/PythonRDD.scala | 2 +- .../apache/spark/api/r/RBackendHandler.scala| 4 +-- .../scala/org/apache/spark/api/r/RRDD.scala | 2 +- .../apache/spark/broadcast/HttpBroadcast.scala | 4 +-- .../spark/deploy/FaultToleranceTest.scala | 2 +- .../org/apache/spark/deploy/SparkSubmit.scala | 2 +- .../org/apache/spark/deploy/worker/Worker.scala | 2 +- .../org/apache/spark/executor/TaskMetrics.scala | 2 +- .../mapreduce/SparkHadoopMapReduceUtil.scala| 2 +- .../apache/spark/network/nio/BlockMessage.scala | 2 +- .../spark/network/nio/BlockMessageArray.scala | 4 +-- .../spark/network/nio/SecurityMessage.scala | 2 +- .../spark/partial/GroupedCountEvaluator.scala | 6 ++-- .../org/apache/spark/rdd/CheckpointRDD.scala| 2 +- .../org/apache/spark/rdd/CoalescedRDD.scala | 4 +-- .../org/apache/spark/rdd/PairRDDFunctions.scala | 6 ++-- .../main/scala/org/apache/spark/rdd/RDD.scala | 12 +++ .../spark/rdd/SequenceFileRDDFunctions.scala| 6 ++-- .../org/apache/spark/rdd/SubtractedRDD.scala| 2 +- .../apache/spark/rdd/ZippedPartitionsRDD.scala | 2 +- .../apache/spark/scheduler/DAGScheduler.scala | 4 +-- .../spark/scheduler/DAGSchedulerSource.scala| 3 +- .../spark/scheduler/SchedulingAlgorithm.scala | 2 +- .../apache/spark/scheduler/SparkListener.scala | 6 ++-- .../apache/spark/scheduler/TaskSetManager.scala | 4 +-- .../cluster/CoarseGrainedClusterMessage.scala | 3 +- .../cluster/YarnSchedulerBackend.scala | 2 +- .../mesos/CoarseMesosSchedulerBackend.scala | 2 +- .../cluster/mesos/MesosSchedulerBackend.scala | 2 +- .../mesos/MesosSchedulerBackendUtil.scala | 2 +- .../spark/status/api/v1/AllStagesResource.scala | 4 +-- .../spark/status/api/v1/ApiRootResource.scala | 8 ++--- .../spark/status/api/v1/OneRDDResource.scala| 2 +- .../org/apache/spark/status/api/v1/api.scala| 2 +- .../storage/BlockManagerSlaveEndpoint.scala | 2 +- .../spark/storage/BlockManagerSource.scala | 3 +- .../scala/org/apache/spark/ui/SparkUI.scala | 2 +- .../scala/org/apache/spark/ui/UIUtils.scala | 2 +- .../apache/spark/ui/UIWorkloadGenerator.scala | 2 +- .../org/apache/spark/ui/storage/RDDPage.scala | 2 +- .../scala/org/apache/spark/util/AkkaUtils.scala | 2 +- .../apache/spark/util/CompletionIterator.scala | 2 +- .../org/apache/spark/util/Distribution.scala| 4 +-- .../org/apache/spark/util/MetadataCleaner.scala | 2 +- .../org/apache/spark/util/MutablePair.scala | 2 +- .../org/apache/spark/util/SizeEstimator.scala | 16 - .../scala/org/apache/spark/util/Utils.scala | 2 +- .../apache/spark/util/collection/BitSet.scala | 2 +- .../spark/util/collection/SortDataFormat.scala | 4 +-- .../util/random/StratifiedSamplingUtils.scala | 2 +- .../org/apache/spark/AccumulatorSuite.scala | 2 +- .../org/apache/spark/CheckpointSuite.scala | 4 +-- .../org/apache/spark/ContextCleanerSuite.scala | 6 ++-- .../scala/org/apache/spark/FailureSuite.scala | 2 +- .../org/apache/spark/FileServerSuite.scala | 20 ++-- .../test/scala/org/apache/spark/FileSuite.scala | 2 +- .../apache/spark/ImplicitOrderingSuite.scala| 4 +-- .../scala/org/apache/spark/SparkConfSuite.scala | 12 +++ .../org/apache/spark/SparkContextSuite.scala| 14
spark git commit: [SPARK-7927] whitespace fixes for Catalyst module.
Repository: spark Updated Branches: refs/heads/branch-1.4 142ae52d4 - 22e42e3fe [SPARK-7927] whitespace fixes for Catalyst module. So we can enable a whitespace enforcement rule in the style checker to save code review time. Author: Reynold Xin r...@databricks.com Closes #6476 from rxin/whitespace-catalyst and squashes the following commits: 650409d [Reynold Xin] Fixed tests. 51a9e5d [Reynold Xin] [SPARK-7927] whitespace fixes for Catalyst module. (cherry picked from commit 8da560d7de9b3c9a3e3ff197eeb10a3d7023f10d) Signed-off-by: Reynold Xin r...@databricks.com Conflicts: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/windowExpressions.scala Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/22e42e3f Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/22e42e3f Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/22e42e3f Branch: refs/heads/branch-1.4 Commit: 22e42e3fee21fc1adcb4a4fb515197be6e1a36b0 Parents: 142ae52 Author: Reynold Xin r...@databricks.com Authored: Thu May 28 20:11:57 2015 -0700 Committer: Reynold Xin r...@databricks.com Committed: Thu May 28 20:14:53 2015 -0700 -- .../sql/catalyst/AbstractSparkSQLParser.scala | 2 +- .../apache/spark/sql/catalyst/SqlParser.scala | 8 +- .../spark/sql/catalyst/analysis/Analyzer.scala | 9 +-- .../spark/sql/catalyst/analysis/Catalog.scala | 2 +- .../catalyst/analysis/HiveTypeCoercion.scala| 5 +- .../apache/spark/sql/catalyst/dsl/package.scala | 2 +- .../spark/sql/catalyst/errors/package.scala | 7 -- .../spark/sql/catalyst/expressions/Cast.scala | 84 ++-- .../sql/catalyst/expressions/ExtractValue.scala | 2 +- .../sql/catalyst/expressions/aggregates.scala | 4 +- .../expressions/codegen/CodeGenerator.scala | 4 +- .../codegen/GenerateProjection.scala| 2 +- .../sql/catalyst/expressions/generators.scala | 4 +- .../catalyst/expressions/stringOperations.scala | 4 +- .../expressions/windowExpressions.scala | 4 +- .../spark/sql/catalyst/plans/QueryPlan.scala| 4 +- .../catalyst/plans/logical/basicOperators.scala | 2 +- .../spark/sql/catalyst/trees/TreeNode.scala | 4 +- .../spark/sql/catalyst/util/package.scala | 2 +- .../org/apache/spark/sql/types/DataType.scala | 2 +- .../sql/catalyst/ScalaReflectionSuite.scala | 2 +- .../spark/sql/catalyst/SqlParserSuite.scala | 4 +- .../sql/catalyst/analysis/AnalysisSuite.scala | 2 +- .../expressions/ExpressionEvaluationSuite.scala | 56 ++--- .../expressions/GeneratedEvaluationSuite.scala | 6 +- .../GeneratedMutableEvaluationSuite.scala | 2 +- .../optimizer/BooleanSimplificationSuite.scala | 4 +- .../optimizer/FilterPushdownSuite.scala | 2 +- .../catalyst/optimizer/OptimizeInSuite.scala| 6 +- .../catalyst/optimizer/UnionPushdownSuite.scala | 4 +- .../sql/catalyst/trees/TreeNodeSuite.scala | 4 +- .../apache/spark/sql/types/DataTypeSuite.scala | 4 +- 32 files changed, 121 insertions(+), 132 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/22e42e3f/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/AbstractSparkSQLParser.scala -- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/AbstractSparkSQLParser.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/AbstractSparkSQLParser.scala index 2eb3e16..ef7b3ad 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/AbstractSparkSQLParser.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/AbstractSparkSQLParser.scala @@ -103,7 +103,7 @@ class SqlLexical extends StdLexical { ( identChar ~ (identChar | digit).* ^^ { case first ~ rest = processIdent((first :: rest).mkString) } | rep1(digit) ~ ('.' ~ digit.*).? ^^ { -case i ~ None= NumericLit(i.mkString) +case i ~ None = NumericLit(i.mkString) case i ~ Some(d) = FloatLit(i.mkString + . + d.mkString) } | '\'' ~ chrExcept('\'', '\n', EofCh).* ~ '\'' ^^ http://git-wip-us.apache.org/repos/asf/spark/blob/22e42e3f/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SqlParser.scala -- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SqlParser.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SqlParser.scala index fc36b9f..e85312a 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SqlParser.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SqlParser.scala @@ -140,7 +140,7 @@ class SqlParser extends AbstractSparkSQLParser with DataTypeParser {
[1/2] spark git commit: [SPARK-7927] whitespace fixes for core.
Repository: spark Updated Branches: refs/heads/master 8da560d7d - 7f7505d8d http://git-wip-us.apache.org/repos/asf/spark/blob/7f7505d8/core/src/test/scala/org/apache/spark/deploy/worker/WorkerArgumentsTest.scala -- diff --git a/core/src/test/scala/org/apache/spark/deploy/worker/WorkerArgumentsTest.scala b/core/src/test/scala/org/apache/spark/deploy/worker/WorkerArgumentsTest.scala index 7cc2104..e432b8e 100644 --- a/core/src/test/scala/org/apache/spark/deploy/worker/WorkerArgumentsTest.scala +++ b/core/src/test/scala/org/apache/spark/deploy/worker/WorkerArgumentsTest.scala @@ -66,7 +66,7 @@ class WorkerArgumentsTest extends FunSuite { } } val conf = new MySparkConf() -val workerArgs = new WorkerArguments(args, conf) +val workerArgs = new WorkerArguments(args, conf) assert(workerArgs.memory === 5120) } http://git-wip-us.apache.org/repos/asf/spark/blob/7f7505d8/core/src/test/scala/org/apache/spark/deploy/worker/WorkerSuite.scala -- diff --git a/core/src/test/scala/org/apache/spark/deploy/worker/WorkerSuite.scala b/core/src/test/scala/org/apache/spark/deploy/worker/WorkerSuite.scala index 450fba2..93a779d 100644 --- a/core/src/test/scala/org/apache/spark/deploy/worker/WorkerSuite.scala +++ b/core/src/test/scala/org/apache/spark/deploy/worker/WorkerSuite.scala @@ -25,7 +25,7 @@ import org.scalatest.{Matchers, FunSuite} class WorkerSuite extends FunSuite with Matchers { def cmd(javaOpts: String*): Command = { -Command(, Seq.empty, Map.empty, Seq.empty, Seq.empty, Seq(javaOpts:_*)) +Command(, Seq.empty, Map.empty, Seq.empty, Seq.empty, Seq(javaOpts : _*)) } def conf(opts: (String, String)*): SparkConf = new SparkConf(loadDefaults = false).setAll(opts) http://git-wip-us.apache.org/repos/asf/spark/blob/7f7505d8/core/src/test/scala/org/apache/spark/metrics/InputOutputMetricsSuite.scala -- diff --git a/core/src/test/scala/org/apache/spark/metrics/InputOutputMetricsSuite.scala b/core/src/test/scala/org/apache/spark/metrics/InputOutputMetricsSuite.scala index ef3e213..60dba3b 100644 --- a/core/src/test/scala/org/apache/spark/metrics/InputOutputMetricsSuite.scala +++ b/core/src/test/scala/org/apache/spark/metrics/InputOutputMetricsSuite.scala @@ -263,7 +263,7 @@ class InputOutputMetricsSuite extends FunSuite with SharedSparkContext val tmpRdd = sc.textFile(tmpFilePath, numPartitions) -val firstSize= runAndReturnBytesRead { +val firstSize = runAndReturnBytesRead { aRdd.count() } val secondSize = runAndReturnBytesRead { @@ -433,10 +433,10 @@ class OldCombineTextRecordReaderWrapper( /** * Hadoop 2 has a version of this, but we can't use it for backwards compatibility */ -class NewCombineTextInputFormat extends NewCombineFileInputFormat[LongWritable,Text] { +class NewCombineTextInputFormat extends NewCombineFileInputFormat[LongWritable, Text] { def createRecordReader(split: NewInputSplit, context: TaskAttemptContext) : NewRecordReader[LongWritable, Text] = { -new NewCombineFileRecordReader[LongWritable,Text](split.asInstanceOf[NewCombineFileSplit], +new NewCombineFileRecordReader[LongWritable, Text](split.asInstanceOf[NewCombineFileSplit], context, classOf[NewCombineTextRecordReaderWrapper]) } } http://git-wip-us.apache.org/repos/asf/spark/blob/7f7505d8/core/src/test/scala/org/apache/spark/rdd/PairRDDFunctionsSuite.scala -- diff --git a/core/src/test/scala/org/apache/spark/rdd/PairRDDFunctionsSuite.scala b/core/src/test/scala/org/apache/spark/rdd/PairRDDFunctionsSuite.scala index ca0d953..6564232 100644 --- a/core/src/test/scala/org/apache/spark/rdd/PairRDDFunctionsSuite.scala +++ b/core/src/test/scala/org/apache/spark/rdd/PairRDDFunctionsSuite.scala @@ -512,17 +512,17 @@ class PairRDDFunctionsSuite extends FunSuite with SharedSparkContext { } test(lookup) { -val pairs = sc.parallelize(Array((1,2), (3,4), (5,6), (5,7))) +val pairs = sc.parallelize(Array((1, 2), (3, 4), (5, 6), (5, 7))) assert(pairs.partitioner === None) assert(pairs.lookup(1) === Seq(2)) -assert(pairs.lookup(5) === Seq(6,7)) +assert(pairs.lookup(5) === Seq(6, 7)) assert(pairs.lookup(-1) === Seq()) } test(lookup with partitioner) { -val pairs = sc.parallelize(Array((1,2), (3,4), (5,6), (5,7))) +val pairs = sc.parallelize(Array((1, 2), (3, 4), (5, 6), (5, 7))) val p = new Partitioner { def numPartitions: Int = 2 @@ -533,12 +533,12 @@ class PairRDDFunctionsSuite extends FunSuite with SharedSparkContext { assert(shuffled.partitioner === Some(p)) assert(shuffled.lookup(1) === Seq(2)) -assert(shuffled.lookup(5) === Seq(6,7)) +assert(shuffled.lookup(5) ===
spark git commit: [SPARK-7198] [MLLIB] VectorAssembler should output ML attributes
Repository: spark Updated Branches: refs/heads/branch-1.4 0a65224ae - b9bdf12a1 [SPARK-7198] [MLLIB] VectorAssembler should output ML attributes `VectorAssembler` should carry over ML attributes. For unknown attributes, we assume numeric values. This PR handles the following cases: 1. DoubleType with ML attribute: carry over 2. DoubleType without ML attribute: numeric value 3. Scalar type: numeric value 4. VectorType with all ML attributes: carry over and update names 5. VectorType with number of ML attributes: assume all numeric 6. VectorType without ML attributes: check the first row and get the number of attributes jkbradley Author: Xiangrui Meng m...@databricks.com Closes #6452 from mengxr/SPARK-7198 and squashes the following commits: a9d2469 [Xiangrui Meng] add space facdb1f [Xiangrui Meng] VectorAssembler should output ML attributes (cherry picked from commit 7859ab659eecbcf2d8b9a274a4e9e4f5186a528c) Signed-off-by: Joseph K. Bradley jos...@databricks.com Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/b9bdf12a Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/b9bdf12a Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/b9bdf12a Branch: refs/heads/branch-1.4 Commit: b9bdf12a1c2ea81cfaae7df540670c34d028838d Parents: 0a65224 Author: Xiangrui Meng m...@databricks.com Authored: Thu May 28 16:32:51 2015 -0700 Committer: Joseph K. Bradley jos...@databricks.com Committed: Thu May 28 16:32:59 2015 -0700 -- .../spark/ml/feature/VectorAssembler.scala | 51 ++-- .../spark/ml/feature/VectorAssemblerSuite.scala | 37 ++ 2 files changed, 83 insertions(+), 5 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/b9bdf12a/mllib/src/main/scala/org/apache/spark/ml/feature/VectorAssembler.scala -- diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/VectorAssembler.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/VectorAssembler.scala index 514ffb0..229ee27 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/VectorAssembler.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/VectorAssembler.scala @@ -22,6 +22,7 @@ import scala.collection.mutable.ArrayBuilder import org.apache.spark.SparkException import org.apache.spark.annotation.Experimental import org.apache.spark.ml.Transformer +import org.apache.spark.ml.attribute.{Attribute, AttributeGroup, NumericAttribute, UnresolvedAttribute} import org.apache.spark.ml.param.shared._ import org.apache.spark.ml.util.Identifiable import org.apache.spark.mllib.linalg.{Vector, VectorUDT, Vectors} @@ -37,7 +38,7 @@ import org.apache.spark.sql.types._ class VectorAssembler(override val uid: String) extends Transformer with HasInputCols with HasOutputCol { - def this() = this(Identifiable.randomUID(va)) + def this() = this(Identifiable.randomUID(vecAssembler)) /** @group setParam */ def setInputCols(value: Array[String]): this.type = set(inputCols, value) @@ -46,19 +47,59 @@ class VectorAssembler(override val uid: String) def setOutputCol(value: String): this.type = set(outputCol, value) override def transform(dataset: DataFrame): DataFrame = { +// Schema transformation. +val schema = dataset.schema +lazy val first = dataset.first() +val attrs = $(inputCols).flatMap { c = + val field = schema(c) + val index = schema.fieldIndex(c) + field.dataType match { +case DoubleType = + val attr = Attribute.fromStructField(field) + // If the input column doesn't have ML attribute, assume numeric. + if (attr == UnresolvedAttribute) { +Some(NumericAttribute.defaultAttr.withName(c)) + } else { +Some(attr.withName(c)) + } +case _: NumericType | BooleanType = + // If the input column type is a compatible scalar type, assume numeric. + Some(NumericAttribute.defaultAttr.withName(c)) +case _: VectorUDT = + val group = AttributeGroup.fromStructField(field) + if (group.attributes.isDefined) { +// If attributes are defined, copy them with updated names. +group.attributes.get.map { attr = + if (attr.name.isDefined) { +// TODO: Define a rigorous naming scheme. +attr.withName(c + _ + attr.name.get) + } else { +attr + } +} + } else { +// Otherwise, treat all attributes as numeric. If we cannot get the number of attributes +// from metadata, check the first row. +val numAttrs = group.numAttributes.getOrElse(first.getAs[Vector](index).size) +
spark git commit: Remove SizeEstimator from o.a.spark package.
Repository: spark Updated Branches: refs/heads/master 7859ab659 - 0077af22c Remove SizeEstimator from o.a.spark package. See comments on https://github.com/apache/spark/pull/3913 Author: Reynold Xin r...@databricks.com Closes #6471 from rxin/sizeestimator and squashes the following commits: c057095 [Reynold Xin] Fixed import. 2da478b [Reynold Xin] Remove SizeEstimator from o.a.spark package. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/0077af22 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/0077af22 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/0077af22 Branch: refs/heads/master Commit: 0077af22ca5fcb2e50dcf7daa4f6804ae722bfbe Parents: 7859ab6 Author: Reynold Xin r...@databricks.com Authored: Thu May 28 16:56:59 2015 -0700 Committer: Reynold Xin r...@databricks.com Committed: Thu May 28 16:56:59 2015 -0700 -- .../scala/org/apache/spark/SizeEstimator.scala | 44 .../org/apache/spark/util/SizeEstimator.scala | 20 +++-- 2 files changed, 17 insertions(+), 47 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/0077af22/core/src/main/scala/org/apache/spark/SizeEstimator.scala -- diff --git a/core/src/main/scala/org/apache/spark/SizeEstimator.scala b/core/src/main/scala/org/apache/spark/SizeEstimator.scala deleted file mode 100644 index 54fc3a8..000 --- a/core/src/main/scala/org/apache/spark/SizeEstimator.scala +++ /dev/null @@ -1,44 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the License); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - *http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.spark - -import org.apache.spark.annotation.DeveloperApi - -/** - * Estimates the sizes of Java objects (number of bytes of memory they occupy), for use in - * memory-aware caches. - * - * Based on the following JavaWorld article: - * http://www.javaworld.com/javaworld/javaqa/2003-12/02-qa-1226-sizeof.html - */ -@DeveloperApi -object SizeEstimator { - /** - * :: DeveloperApi :: - * Estimate the number of bytes that the given object takes up on the JVM heap. The estimate - * includes space taken up by objects referenced by the given object, their references, and so on - * and so forth. - * - * This is useful for determining the amount of heap space a broadcast variable will occupy on - * each executor or the amount of space each object will take when caching objects in - * deserialized form. This is not the same as the serialized size of the object, which will - * typically be much smaller. - */ - @DeveloperApi - def estimate(obj: AnyRef): Long = org.apache.spark.util.SizeEstimator.estimate(obj) -} http://git-wip-us.apache.org/repos/asf/spark/blob/0077af22/core/src/main/scala/org/apache/spark/util/SizeEstimator.scala -- diff --git a/core/src/main/scala/org/apache/spark/util/SizeEstimator.scala b/core/src/main/scala/org/apache/spark/util/SizeEstimator.scala index 968a72d..f38949c 100644 --- a/core/src/main/scala/org/apache/spark/util/SizeEstimator.scala +++ b/core/src/main/scala/org/apache/spark/util/SizeEstimator.scala @@ -21,21 +21,37 @@ import java.lang.management.ManagementFactory import java.lang.reflect.{Field, Modifier} import java.util.{IdentityHashMap, Random} import java.util.concurrent.ConcurrentHashMap + import scala.collection.mutable.ArrayBuffer import scala.runtime.ScalaRunTime import org.apache.spark.Logging +import org.apache.spark.annotation.DeveloperApi import org.apache.spark.util.collection.OpenHashSet /** + * :: DeveloperApi :: * Estimates the sizes of Java objects (number of bytes of memory they occupy), for use in * memory-aware caches. * * Based on the following JavaWorld article: * http://www.javaworld.com/javaworld/javaqa/2003-12/02-qa-1226-sizeof.html */ -private[spark] object SizeEstimator extends Logging { +@DeveloperApi +object SizeEstimator extends Logging { + + /** + * Estimate the number of bytes that the given object takes up on the JVM heap.
spark git commit: [SPARK-7927] whitespace fixes for Hive and ThriftServer.
Repository: spark Updated Branches: refs/heads/master 3af0b3136 - ee6a0e12f [SPARK-7927] whitespace fixes for Hive and ThriftServer. So we can enable a whitespace enforcement rule in the style checker to save code review time. Author: Reynold Xin r...@databricks.com Closes #6478 from rxin/whitespace-hive and squashes the following commits: e01b0e0 [Reynold Xin] Fixed tests. a3bba22 [Reynold Xin] [SPARK-7927] whitespace fixes for Hive and ThriftServer. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/ee6a0e12 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/ee6a0e12 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/ee6a0e12 Branch: refs/heads/master Commit: ee6a0e12fb76e4d5c24175900e5bf6a8cb35e2b0 Parents: 3af0b31 Author: Reynold Xin r...@databricks.com Authored: Thu May 28 18:08:56 2015 -0700 Committer: Reynold Xin r...@databricks.com Committed: Thu May 28 18:08:56 2015 -0700 -- .../spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala | 8 .../spark/sql/hive/thriftserver/ui/ThriftServerPage.scala | 6 +++--- .../hive/thriftserver/ui/ThriftServerSessionPage.scala| 2 +- .../spark/sql/hive/thriftserver/UISeleniumSuite.scala | 4 ++-- .../org/apache/spark/sql/hive/ExtendedHiveQlParser.scala | 6 +++--- .../scala/org/apache/spark/sql/hive/HiveContext.scala | 4 ++-- .../scala/org/apache/spark/sql/hive/HiveInspectors.scala | 10 +- .../org/apache/spark/sql/hive/HiveMetastoreCatalog.scala | 10 +++--- .../src/main/scala/org/apache/spark/sql/hive/HiveQl.scala | 9 + .../spark/sql/hive/execution/InsertIntoHiveTable.scala| 7 +++ .../spark/sql/hive/execution/ScriptTransformation.scala | 2 +- .../main/scala/org/apache/spark/sql/hive/hiveUdfs.scala | 6 +++--- .../org/apache/spark/sql/hive/hiveWriterContainers.scala | 2 +- .../scala/org/apache/spark/sql/hive/test/TestHive.scala | 6 +++--- 14 files changed, 43 insertions(+), 39 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/ee6a0e12/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala -- diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala index deb1008..14f6f65 100644 --- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala +++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala @@ -43,7 +43,7 @@ import org.apache.spark.util.Utils private[hive] object SparkSQLCLIDriver { private var prompt = spark-sql private var continuedPrompt = .padTo(prompt.length, ' ') - private var transport:TSocket = _ + private var transport: TSocket = _ installSignalHandler() @@ -276,13 +276,13 @@ private[hive] class SparkSQLCLIDriver extends CliDriver with Logging { driver.init() val out = sessionState.out - val start:Long = System.currentTimeMillis() + val start: Long = System.currentTimeMillis() if (sessionState.getIsVerbose) { out.println(cmd) } val rc = driver.run(cmd) val end = System.currentTimeMillis() - val timeTaken:Double = (end - start) / 1000.0 + val timeTaken: Double = (end - start) / 1000.0 ret = rc.getResponseCode if (ret != 0) { @@ -310,7 +310,7 @@ private[hive] class SparkSQLCLIDriver extends CliDriver with Logging { res.clear() } } catch { -case e:IOException = +case e: IOException = console.printError( sFailed with exception ${e.getClass.getName}: ${e.getMessage} |${org.apache.hadoop.util.StringUtils.stringifyException(e)} http://git-wip-us.apache.org/repos/asf/spark/blob/ee6a0e12/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/ui/ThriftServerPage.scala -- diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/ui/ThriftServerPage.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/ui/ThriftServerPage.scala index 7c48ff4..10c83d8 100644 --- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/ui/ThriftServerPage.scala +++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/ui/ThriftServerPage.scala @@ -77,7 +77,7 @@ private[ui] class ThriftServerPage(parent: ThriftServerTab) extends
spark git commit: [SPARK-7927] whitespace fixes for Hive and ThriftServer.
Repository: spark Updated Branches: refs/heads/branch-1.4 f4b135337 - 3b38c06f0 [SPARK-7927] whitespace fixes for Hive and ThriftServer. So we can enable a whitespace enforcement rule in the style checker to save code review time. Author: Reynold Xin r...@databricks.com Closes #6478 from rxin/whitespace-hive and squashes the following commits: e01b0e0 [Reynold Xin] Fixed tests. a3bba22 [Reynold Xin] [SPARK-7927] whitespace fixes for Hive and ThriftServer. (cherry picked from commit ee6a0e12fb76e4d5c24175900e5bf6a8cb35e2b0) Signed-off-by: Reynold Xin r...@databricks.com Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/3b38c06f Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/3b38c06f Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/3b38c06f Branch: refs/heads/branch-1.4 Commit: 3b38c06f0d19bd0d15df768d6ae0037f6c04b88d Parents: f4b1353 Author: Reynold Xin r...@databricks.com Authored: Thu May 28 18:08:56 2015 -0700 Committer: Reynold Xin r...@databricks.com Committed: Thu May 28 18:09:09 2015 -0700 -- .../spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala | 8 .../spark/sql/hive/thriftserver/ui/ThriftServerPage.scala | 6 +++--- .../hive/thriftserver/ui/ThriftServerSessionPage.scala| 2 +- .../spark/sql/hive/thriftserver/UISeleniumSuite.scala | 4 ++-- .../org/apache/spark/sql/hive/ExtendedHiveQlParser.scala | 6 +++--- .../scala/org/apache/spark/sql/hive/HiveContext.scala | 4 ++-- .../scala/org/apache/spark/sql/hive/HiveInspectors.scala | 10 +- .../org/apache/spark/sql/hive/HiveMetastoreCatalog.scala | 10 +++--- .../src/main/scala/org/apache/spark/sql/hive/HiveQl.scala | 9 + .../spark/sql/hive/execution/InsertIntoHiveTable.scala| 7 +++ .../spark/sql/hive/execution/ScriptTransformation.scala | 2 +- .../main/scala/org/apache/spark/sql/hive/hiveUdfs.scala | 6 +++--- .../org/apache/spark/sql/hive/hiveWriterContainers.scala | 2 +- .../scala/org/apache/spark/sql/hive/test/TestHive.scala | 6 +++--- 14 files changed, 43 insertions(+), 39 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/3b38c06f/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala -- diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala index deb1008..14f6f65 100644 --- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala +++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala @@ -43,7 +43,7 @@ import org.apache.spark.util.Utils private[hive] object SparkSQLCLIDriver { private var prompt = spark-sql private var continuedPrompt = .padTo(prompt.length, ' ') - private var transport:TSocket = _ + private var transport: TSocket = _ installSignalHandler() @@ -276,13 +276,13 @@ private[hive] class SparkSQLCLIDriver extends CliDriver with Logging { driver.init() val out = sessionState.out - val start:Long = System.currentTimeMillis() + val start: Long = System.currentTimeMillis() if (sessionState.getIsVerbose) { out.println(cmd) } val rc = driver.run(cmd) val end = System.currentTimeMillis() - val timeTaken:Double = (end - start) / 1000.0 + val timeTaken: Double = (end - start) / 1000.0 ret = rc.getResponseCode if (ret != 0) { @@ -310,7 +310,7 @@ private[hive] class SparkSQLCLIDriver extends CliDriver with Logging { res.clear() } } catch { -case e:IOException = +case e: IOException = console.printError( sFailed with exception ${e.getClass.getName}: ${e.getMessage} |${org.apache.hadoop.util.StringUtils.stringifyException(e)} http://git-wip-us.apache.org/repos/asf/spark/blob/3b38c06f/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/ui/ThriftServerPage.scala -- diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/ui/ThriftServerPage.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/ui/ThriftServerPage.scala index 7c48ff4..10c83d8 100644 --- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/ui/ThriftServerPage.scala +++
spark git commit: [SPARK-7826] [CORE] Suppress extra calling getCacheLocs.
Repository: spark Updated Branches: refs/heads/master 66c49ed60 - 9b692bfdf [SPARK-7826] [CORE] Suppress extra calling getCacheLocs. There are too many extra call method `getCacheLocs` for `DAGScheduler`, which includes Akka communication. To improve `DAGScheduler` performance, suppress extra calling the method. In my application with over 1200 stages, the execution time became 3.8 min from 8.5 min with my patch. Author: Takuya UESHIN ues...@happy-camper.st Closes #6352 from ueshin/issues/SPARK-7826 and squashes the following commits: 3d4d036 [Takuya UESHIN] Modify a test and the documentation. 10b1b22 [Takuya UESHIN] Simplify the unit test. d858b59 [Takuya UESHIN] Move the storageLevel check inside the if (!cacheLocs.contains(rdd.id)) block. 6f3125c [Takuya UESHIN] Fix scalastyle. b9c835c [Takuya UESHIN] Put the condition that checks if the RDD has uncached partition or not into variable for readability. f87f2ec [Takuya UESHIN] Get cached locations from block manager only if the storage level of the RDD is not StorageLevel.NONE. 8248386 [Takuya UESHIN] Revert Suppress extra calling getCacheLocs. a4d944a [Takuya UESHIN] Add an unit test. 9a80fad [Takuya UESHIN] Suppress extra calling getCacheLocs. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/9b692bfd Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/9b692bfd Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/9b692bfd Branch: refs/heads/master Commit: 9b692bfdfcc91b32498865d21138cf215a378665 Parents: 66c49ed Author: Takuya UESHIN ues...@happy-camper.st Authored: Thu May 28 19:05:12 2015 -0700 Committer: Kay Ousterhout kayousterh...@gmail.com Committed: Thu May 28 19:05:12 2015 -0700 -- .../apache/spark/scheduler/DAGScheduler.scala | 15 ++--- .../spark/scheduler/DAGSchedulerSuite.scala | 35 +--- 2 files changed, 42 insertions(+), 8 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/9b692bfd/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala -- diff --git a/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala b/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala index a083be2..a2299e9 100644 --- a/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala +++ b/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala @@ -193,9 +193,15 @@ class DAGScheduler( def getCacheLocs(rdd: RDD[_]): Seq[Seq[TaskLocation]] = cacheLocs.synchronized { // Note: this doesn't use `getOrElse()` because this method is called O(num tasks) times if (!cacheLocs.contains(rdd.id)) { - val blockIds = rdd.partitions.indices.map(index = RDDBlockId(rdd.id, index)).toArray[BlockId] - val locs: Seq[Seq[TaskLocation]] = blockManagerMaster.getLocations(blockIds).map { bms = -bms.map(bm = TaskLocation(bm.host, bm.executorId)) + // Note: if the storage level is NONE, we don't need to get locations from block manager. + val locs: Seq[Seq[TaskLocation]] = if (rdd.getStorageLevel == StorageLevel.NONE) { +Seq.fill(rdd.partitions.size)(Nil) + } else { +val blockIds = + rdd.partitions.indices.map(index = RDDBlockId(rdd.id, index)).toArray[BlockId] +blockManagerMaster.getLocations(blockIds).map { bms = + bms.map(bm = TaskLocation(bm.host, bm.executorId)) +} } cacheLocs(rdd.id) = locs } @@ -382,7 +388,8 @@ class DAGScheduler( def visit(rdd: RDD[_]) { if (!visited(rdd)) { visited += rdd -if (getCacheLocs(rdd).contains(Nil)) { +val rddHasUncachedPartitions = getCacheLocs(rdd).contains(Nil) +if (rddHasUncachedPartitions) { for (dep - rdd.dependencies) { dep match { case shufDep: ShuffleDependency[_, _, _] = http://git-wip-us.apache.org/repos/asf/spark/blob/9b692bfd/core/src/test/scala/org/apache/spark/scheduler/DAGSchedulerSuite.scala -- diff --git a/core/src/test/scala/org/apache/spark/scheduler/DAGSchedulerSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/DAGSchedulerSuite.scala index 6a8ae29..4664223 100644 --- a/core/src/test/scala/org/apache/spark/scheduler/DAGSchedulerSuite.scala +++ b/core/src/test/scala/org/apache/spark/scheduler/DAGSchedulerSuite.scala @@ -318,7 +318,7 @@ class DAGSchedulerSuite } test(cache location preferences w/ dependency) { -val baseRdd = new MyRDD(sc, 1, Nil) +val baseRdd = new MyRDD(sc, 1, Nil).cache() val finalRdd = new MyRDD(sc, 1, List(new OneToOneDependency(baseRdd))) cacheLocations(baseRdd.id - 0) = Seq(makeBlockManagerId(hostA),
spark git commit: [SPARK-7927] whitespace fixes for SQL core.
Repository: spark Updated Branches: refs/heads/branch-1.4 0c0511506 - 9b97e95e8 [SPARK-7927] whitespace fixes for SQL core. So we can enable a whitespace enforcement rule in the style checker to save code review time. Author: Reynold Xin r...@databricks.com Closes #6477 from rxin/whitespace-sql-core and squashes the following commits: ce6e369 [Reynold Xin] Fixed tests. 6095fed [Reynold Xin] [SPARK-7927] whitespace fixes for SQL core. (cherry picked from commit ff44c711abc7ca545dfa1e836279c00fe7539c18) Signed-off-by: Reynold Xin r...@databricks.com Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/9b97e95e Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/9b97e95e Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/9b97e95e Branch: refs/heads/branch-1.4 Commit: 9b97e95e86f0d11e8ae3ba55432c726cec79d5bc Parents: 0c05115 Author: Reynold Xin r...@databricks.com Authored: Thu May 28 20:10:21 2015 -0700 Committer: Reynold Xin r...@databricks.com Committed: Thu May 28 20:10:28 2015 -0700 -- .../scala/org/apache/spark/sql/Column.scala | 4 +- .../scala/org/apache/spark/sql/DataFrame.scala | 18 .../org/apache/spark/sql/DataFrameHolder.scala | 2 +- .../org/apache/spark/sql/GroupedData.scala | 10 ++--- .../scala/org/apache/spark/sql/SQLContext.scala | 2 +- .../org/apache/spark/sql/SparkSQLParser.scala | 18 .../columnar/InMemoryColumnarTableScan.scala| 2 +- .../apache/spark/sql/execution/Exchange.scala | 2 +- .../spark/sql/execution/SparkStrategies.scala | 7 ++-- .../joins/BroadcastLeftSemiJoinHash.scala | 2 +- .../sql/execution/stat/FrequentItems.scala | 4 +- .../scala/org/apache/spark/sql/functions.scala | 2 +- .../org/apache/spark/sql/jdbc/JDBCRDD.scala | 44 ++-- .../org/apache/spark/sql/json/InferSchema.scala | 2 +- .../spark/sql/json/JacksonGenerator.scala | 10 ++--- .../org/apache/spark/sql/json/JsonRDD.scala | 8 ++-- .../spark/sql/parquet/ParquetConverter.scala| 12 +++--- .../sql/parquet/ParquetTableOperations.scala| 4 +- .../apache/spark/sql/parquet/ParquetTypes.scala | 2 +- .../org/apache/spark/sql/sources/ddl.scala | 8 ++-- .../spark/sql/ColumnExpressionSuite.scala | 14 +++ .../spark/sql/DataFrameAggregateSuite.scala | 4 +- .../org/apache/spark/sql/DataFrameSuite.scala | 38 - .../scala/org/apache/spark/sql/JoinSuite.scala | 8 ++-- .../org/apache/spark/sql/ListTablesSuite.scala | 2 +- .../org/apache/spark/sql/SQLQuerySuite.scala| 42 +-- .../sql/ScalaReflectionRelationSuite.scala | 4 +- .../scala/org/apache/spark/sql/TestData.scala | 4 +- .../scala/org/apache/spark/sql/UDFSuite.scala | 2 +- .../spark/sql/columnar/ColumnTypeSuite.scala| 6 +-- .../compression/DictionaryEncodingSuite.scala | 4 +- .../compression/IntegralDeltaSuite.scala| 4 +- .../compression/RunLengthEncodingSuite.scala| 10 ++--- .../org/apache/spark/sql/jdbc/JDBCSuite.scala | 4 +- .../org/apache/spark/sql/json/JsonSuite.scala | 2 +- .../apache/spark/sql/sources/DDLTestSuite.scala | 5 +-- .../spark/sql/sources/FilteredScanSuite.scala | 2 +- 37 files changed, 160 insertions(+), 158 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/9b97e95e/sql/core/src/main/scala/org/apache/spark/sql/Column.scala -- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Column.scala b/sql/core/src/main/scala/org/apache/spark/sql/Column.scala index 6895aa1..b49b1d3 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/Column.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/Column.scala @@ -349,7 +349,7 @@ class Column(protected[sql] val expr: Expression) extends Logging { * @group expr_ops * @since 1.4.0 */ - def when(condition: Column, value: Any):Column = this.expr match { + def when(condition: Column, value: Any): Column = this.expr match { case CaseWhen(branches: Seq[Expression]) = CaseWhen(branches ++ Seq(lit(condition).expr, lit(value).expr)) case _ = @@ -378,7 +378,7 @@ class Column(protected[sql] val expr: Expression) extends Logging { * @group expr_ops * @since 1.4.0 */ - def otherwise(value: Any):Column = this.expr match { + def otherwise(value: Any): Column = this.expr match { case CaseWhen(branches: Seq[Expression]) = if (branches.size % 2 == 0) { CaseWhen(branches :+ lit(value).expr) http://git-wip-us.apache.org/repos/asf/spark/blob/9b97e95e/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala -- diff --git
spark git commit: [SPARK-7927] whitespace fixes for Catalyst module.
Repository: spark Updated Branches: refs/heads/master 2881d14cb - 8da560d7d [SPARK-7927] whitespace fixes for Catalyst module. So we can enable a whitespace enforcement rule in the style checker to save code review time. Author: Reynold Xin r...@databricks.com Closes #6476 from rxin/whitespace-catalyst and squashes the following commits: 650409d [Reynold Xin] Fixed tests. 51a9e5d [Reynold Xin] [SPARK-7927] whitespace fixes for Catalyst module. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/8da560d7 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/8da560d7 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/8da560d7 Branch: refs/heads/master Commit: 8da560d7de9b3c9a3e3ff197eeb10a3d7023f10d Parents: 2881d14 Author: Reynold Xin r...@databricks.com Authored: Thu May 28 20:11:57 2015 -0700 Committer: Reynold Xin r...@databricks.com Committed: Thu May 28 20:11:57 2015 -0700 -- .../sql/catalyst/AbstractSparkSQLParser.scala | 2 +- .../apache/spark/sql/catalyst/SqlParser.scala | 8 +- .../spark/sql/catalyst/analysis/Analyzer.scala | 9 +-- .../spark/sql/catalyst/analysis/Catalog.scala | 2 +- .../catalyst/analysis/HiveTypeCoercion.scala| 5 +- .../apache/spark/sql/catalyst/dsl/package.scala | 2 +- .../spark/sql/catalyst/errors/package.scala | 7 -- .../spark/sql/catalyst/expressions/Cast.scala | 84 ++-- .../sql/catalyst/expressions/ExtractValue.scala | 2 +- .../sql/catalyst/expressions/aggregates.scala | 4 +- .../expressions/codegen/CodeGenerator.scala | 4 +- .../codegen/GenerateProjection.scala| 2 +- .../sql/catalyst/expressions/generators.scala | 4 +- .../catalyst/expressions/stringOperations.scala | 4 +- .../expressions/windowExpressions.scala | 2 +- .../spark/sql/catalyst/plans/QueryPlan.scala| 4 +- .../catalyst/plans/logical/basicOperators.scala | 2 +- .../spark/sql/catalyst/trees/TreeNode.scala | 4 +- .../spark/sql/catalyst/util/package.scala | 2 +- .../org/apache/spark/sql/types/DataType.scala | 2 +- .../sql/catalyst/ScalaReflectionSuite.scala | 2 +- .../spark/sql/catalyst/SqlParserSuite.scala | 4 +- .../sql/catalyst/analysis/AnalysisSuite.scala | 2 +- .../expressions/ExpressionEvaluationSuite.scala | 56 ++--- .../expressions/GeneratedEvaluationSuite.scala | 6 +- .../GeneratedMutableEvaluationSuite.scala | 2 +- .../optimizer/BooleanSimplificationSuite.scala | 4 +- .../optimizer/FilterPushdownSuite.scala | 2 +- .../catalyst/optimizer/OptimizeInSuite.scala| 6 +- .../catalyst/optimizer/UnionPushdownSuite.scala | 4 +- .../sql/catalyst/trees/TreeNodeSuite.scala | 4 +- .../apache/spark/sql/types/DataTypeSuite.scala | 4 +- 32 files changed, 121 insertions(+), 130 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/8da560d7/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/AbstractSparkSQLParser.scala -- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/AbstractSparkSQLParser.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/AbstractSparkSQLParser.scala index 2eb3e16..ef7b3ad 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/AbstractSparkSQLParser.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/AbstractSparkSQLParser.scala @@ -103,7 +103,7 @@ class SqlLexical extends StdLexical { ( identChar ~ (identChar | digit).* ^^ { case first ~ rest = processIdent((first :: rest).mkString) } | rep1(digit) ~ ('.' ~ digit.*).? ^^ { -case i ~ None= NumericLit(i.mkString) +case i ~ None = NumericLit(i.mkString) case i ~ Some(d) = FloatLit(i.mkString + . + d.mkString) } | '\'' ~ chrExcept('\'', '\n', EofCh).* ~ '\'' ^^ http://git-wip-us.apache.org/repos/asf/spark/blob/8da560d7/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SqlParser.scala -- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SqlParser.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SqlParser.scala index fc36b9f..e85312a 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SqlParser.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SqlParser.scala @@ -140,7 +140,7 @@ class SqlParser extends AbstractSparkSQLParser with DataTypeParser { (HAVING ~ expression).? ~ sortType.? ~ (LIMIT ~ expression).? ^^ { -case d ~ p ~ r ~ f ~ g ~ h ~ o ~ l = +case d ~ p ~ r ~ f ~ g ~ h ~ o ~ l = val base = r.getOrElse(OneRowRelation) val
spark git commit: [SPARK-7929] Remove Bagel examples whitespace fix for examples.
Repository: spark Updated Branches: refs/heads/master ff44c711a - 2881d14cb [SPARK-7929] Remove Bagel examples whitespace fix for examples. Author: Reynold Xin r...@databricks.com Closes #6480 from rxin/whitespace-example and squashes the following commits: 8a4a3d4 [Reynold Xin] [SPARK-7929] Remove Bagel examples whitespace fix for examples. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/2881d14c Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/2881d14c Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/2881d14c Branch: refs/heads/master Commit: 2881d14cbedc14f1cd8ae5078446dba1a8d39086 Parents: ff44c71 Author: Reynold Xin r...@databricks.com Authored: Thu May 28 20:11:04 2015 -0700 Committer: Reynold Xin r...@databricks.com Committed: Thu May 28 20:11:04 2015 -0700 -- .../spark/examples/CassandraCQLTest.scala | 6 +- .../org/apache/spark/examples/LocalLR.scala | 2 +- .../org/apache/spark/examples/SparkALS.scala| 2 +- .../org/apache/spark/examples/SparkLR.scala | 2 +- .../spark/examples/bagel/PageRankUtils.scala| 112 - .../examples/bagel/WikipediaPageRank.scala | 106 - .../bagel/WikipediaPageRankStandalone.scala | 232 --- .../spark/examples/ml/OneVsRestExample.scala| 6 +- .../examples/mllib/DenseGaussianMixture.scala | 2 +- .../pythonconverters/AvroConverters.scala | 29 ++- .../examples/streaming/ActorWordCount.scala | 6 +- .../streaming/DirectKafkaWordCount.scala| 2 +- .../examples/streaming/KafkaWordCount.scala | 4 +- .../examples/streaming/MQTTWordCount.scala | 2 +- .../clickstream/PageViewGenerator.scala | 3 +- 15 files changed, 31 insertions(+), 485 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/2881d14c/examples/src/main/scala/org/apache/spark/examples/CassandraCQLTest.scala -- diff --git a/examples/src/main/scala/org/apache/spark/examples/CassandraCQLTest.scala b/examples/src/main/scala/org/apache/spark/examples/CassandraCQLTest.scala index 11d5c92..023bb3e 100644 --- a/examples/src/main/scala/org/apache/spark/examples/CassandraCQLTest.scala +++ b/examples/src/main/scala/org/apache/spark/examples/CassandraCQLTest.scala @@ -104,8 +104,8 @@ object CassandraCQLTest { val casRdd = sc.newAPIHadoopRDD(job.getConfiguration(), classOf[CqlPagingInputFormat], - classOf[java.util.Map[String,ByteBuffer]], - classOf[java.util.Map[String,ByteBuffer]]) + classOf[java.util.Map[String, ByteBuffer]], + classOf[java.util.Map[String, ByteBuffer]]) println(Count: + casRdd.count) val productSaleRDD = casRdd.map { @@ -118,7 +118,7 @@ object CassandraCQLTest { case (productId, saleCount) = println(productId + : + saleCount) } -val casoutputCF = aggregatedRDD.map { +val casoutputCF = aggregatedRDD.map { case (productId, saleCount) = { val outColFamKey = Map(prod_id - ByteBufferUtil.bytes(productId)) val outKey: java.util.Map[String, ByteBuffer] = outColFamKey http://git-wip-us.apache.org/repos/asf/spark/blob/2881d14c/examples/src/main/scala/org/apache/spark/examples/LocalLR.scala -- diff --git a/examples/src/main/scala/org/apache/spark/examples/LocalLR.scala b/examples/src/main/scala/org/apache/spark/examples/LocalLR.scala index a55e0dc..c3fc74a 100644 --- a/examples/src/main/scala/org/apache/spark/examples/LocalLR.scala +++ b/examples/src/main/scala/org/apache/spark/examples/LocalLR.scala @@ -39,7 +39,7 @@ object LocalLR { def generateData: Array[DataPoint] = { def generatePoint(i: Int): DataPoint = { - val y = if(i % 2 == 0) -1 else 1 + val y = if (i % 2 == 0) -1 else 1 val x = DenseVector.fill(D){rand.nextGaussian + y * R} DataPoint(x, y) } http://git-wip-us.apache.org/repos/asf/spark/blob/2881d14c/examples/src/main/scala/org/apache/spark/examples/SparkALS.scala -- diff --git a/examples/src/main/scala/org/apache/spark/examples/SparkALS.scala b/examples/src/main/scala/org/apache/spark/examples/SparkALS.scala index 6c0ac80..30c4261 100644 --- a/examples/src/main/scala/org/apache/spark/examples/SparkALS.scala +++ b/examples/src/main/scala/org/apache/spark/examples/SparkALS.scala @@ -117,7 +117,7 @@ object SparkALS { var us = Array.fill(U)(randomVector(F)) // Iteratively update movies then users -val Rc = sc.broadcast(R) +val Rc = sc.broadcast(R) var msb = sc.broadcast(ms) var usb = sc.broadcast(us) for (iter - 1 to ITERATIONS) {
spark git commit: [SPARK-7929] Remove Bagel examples whitespace fix for examples.
Repository: spark Updated Branches: refs/heads/branch-1.4 9b97e95e8 - 142ae52d4 [SPARK-7929] Remove Bagel examples whitespace fix for examples. Author: Reynold Xin r...@databricks.com Closes #6480 from rxin/whitespace-example and squashes the following commits: 8a4a3d4 [Reynold Xin] [SPARK-7929] Remove Bagel examples whitespace fix for examples. (cherry picked from commit 2881d14cbedc14f1cd8ae5078446dba1a8d39086) Signed-off-by: Reynold Xin r...@databricks.com Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/142ae52d Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/142ae52d Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/142ae52d Branch: refs/heads/branch-1.4 Commit: 142ae52d4800fdb966b14b8f0753ba7567c55204 Parents: 9b97e95 Author: Reynold Xin r...@databricks.com Authored: Thu May 28 20:11:04 2015 -0700 Committer: Reynold Xin r...@databricks.com Committed: Thu May 28 20:11:11 2015 -0700 -- .../spark/examples/CassandraCQLTest.scala | 6 +- .../org/apache/spark/examples/LocalLR.scala | 2 +- .../org/apache/spark/examples/SparkALS.scala| 2 +- .../org/apache/spark/examples/SparkLR.scala | 2 +- .../spark/examples/bagel/PageRankUtils.scala| 112 - .../examples/bagel/WikipediaPageRank.scala | 106 - .../bagel/WikipediaPageRankStandalone.scala | 232 --- .../spark/examples/ml/OneVsRestExample.scala| 6 +- .../examples/mllib/DenseGaussianMixture.scala | 2 +- .../pythonconverters/AvroConverters.scala | 29 ++- .../examples/streaming/ActorWordCount.scala | 6 +- .../streaming/DirectKafkaWordCount.scala| 2 +- .../examples/streaming/KafkaWordCount.scala | 4 +- .../examples/streaming/MQTTWordCount.scala | 2 +- .../clickstream/PageViewGenerator.scala | 3 +- 15 files changed, 31 insertions(+), 485 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/142ae52d/examples/src/main/scala/org/apache/spark/examples/CassandraCQLTest.scala -- diff --git a/examples/src/main/scala/org/apache/spark/examples/CassandraCQLTest.scala b/examples/src/main/scala/org/apache/spark/examples/CassandraCQLTest.scala index 11d5c92..023bb3e 100644 --- a/examples/src/main/scala/org/apache/spark/examples/CassandraCQLTest.scala +++ b/examples/src/main/scala/org/apache/spark/examples/CassandraCQLTest.scala @@ -104,8 +104,8 @@ object CassandraCQLTest { val casRdd = sc.newAPIHadoopRDD(job.getConfiguration(), classOf[CqlPagingInputFormat], - classOf[java.util.Map[String,ByteBuffer]], - classOf[java.util.Map[String,ByteBuffer]]) + classOf[java.util.Map[String, ByteBuffer]], + classOf[java.util.Map[String, ByteBuffer]]) println(Count: + casRdd.count) val productSaleRDD = casRdd.map { @@ -118,7 +118,7 @@ object CassandraCQLTest { case (productId, saleCount) = println(productId + : + saleCount) } -val casoutputCF = aggregatedRDD.map { +val casoutputCF = aggregatedRDD.map { case (productId, saleCount) = { val outColFamKey = Map(prod_id - ByteBufferUtil.bytes(productId)) val outKey: java.util.Map[String, ByteBuffer] = outColFamKey http://git-wip-us.apache.org/repos/asf/spark/blob/142ae52d/examples/src/main/scala/org/apache/spark/examples/LocalLR.scala -- diff --git a/examples/src/main/scala/org/apache/spark/examples/LocalLR.scala b/examples/src/main/scala/org/apache/spark/examples/LocalLR.scala index a55e0dc..c3fc74a 100644 --- a/examples/src/main/scala/org/apache/spark/examples/LocalLR.scala +++ b/examples/src/main/scala/org/apache/spark/examples/LocalLR.scala @@ -39,7 +39,7 @@ object LocalLR { def generateData: Array[DataPoint] = { def generatePoint(i: Int): DataPoint = { - val y = if(i % 2 == 0) -1 else 1 + val y = if (i % 2 == 0) -1 else 1 val x = DenseVector.fill(D){rand.nextGaussian + y * R} DataPoint(x, y) } http://git-wip-us.apache.org/repos/asf/spark/blob/142ae52d/examples/src/main/scala/org/apache/spark/examples/SparkALS.scala -- diff --git a/examples/src/main/scala/org/apache/spark/examples/SparkALS.scala b/examples/src/main/scala/org/apache/spark/examples/SparkALS.scala index 6c0ac80..30c4261 100644 --- a/examples/src/main/scala/org/apache/spark/examples/SparkALS.scala +++ b/examples/src/main/scala/org/apache/spark/examples/SparkALS.scala @@ -117,7 +117,7 @@ object SparkALS { var us = Array.fill(U)(randomVector(F)) // Iteratively update movies then users -val Rc = sc.broadcast(R) +val Rc = sc.broadcast(R) var
spark git commit: [SPARK-7922] [MLLIB] use DataFrames for user/item factors in ALSModel
Repository: spark Updated Branches: refs/heads/branch-1.4 f7cb272b7 - 68559423a [SPARK-7922] [MLLIB] use DataFrames for user/item factors in ALSModel Expose user/item factors in DataFrames. This is to be more consistent with the pipeline API. It also helps maintain consistent APIs across languages. This PR also removed fitting params from `ALSModel`. coderxiang Author: Xiangrui Meng m...@databricks.com Closes #6468 from mengxr/SPARK-7922 and squashes the following commits: 7bfb1d5 [Xiangrui Meng] update ALSModel in PySpark 1ba5607 [Xiangrui Meng] use DataFrames for user/item factors in ALS (cherry picked from commit db9513789756da4f16bb1fe8cf1d19500f231f54) Signed-off-by: Xiangrui Meng m...@databricks.com Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/68559423 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/68559423 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/68559423 Branch: refs/heads/branch-1.4 Commit: 68559423ac2ffc2c9dfcbe95a8efa4868757c4bf Parents: f7cb272 Author: Xiangrui Meng m...@databricks.com Authored: Thu May 28 22:38:38 2015 -0700 Committer: Xiangrui Meng m...@databricks.com Committed: Thu May 28 22:38:46 2015 -0700 -- .../apache/spark/ml/recommendation/ALS.scala| 101 +++ python/pyspark/ml/recommendation.py | 30 +- python/pyspark/mllib/common.py | 5 +- 3 files changed, 89 insertions(+), 47 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/68559423/mllib/src/main/scala/org/apache/spark/ml/recommendation/ALS.scala -- diff --git a/mllib/src/main/scala/org/apache/spark/ml/recommendation/ALS.scala b/mllib/src/main/scala/org/apache/spark/ml/recommendation/ALS.scala index 900b637..df009d8 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/recommendation/ALS.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/recommendation/ALS.scala @@ -35,21 +35,46 @@ import org.apache.spark.annotation.{DeveloperApi, Experimental} import org.apache.spark.ml.{Estimator, Model} import org.apache.spark.ml.param._ import org.apache.spark.ml.param.shared._ -import org.apache.spark.ml.util.Identifiable +import org.apache.spark.ml.util.{Identifiable, SchemaUtils} import org.apache.spark.mllib.optimization.NNLS import org.apache.spark.rdd.RDD import org.apache.spark.sql.DataFrame import org.apache.spark.sql.functions._ -import org.apache.spark.sql.types.{DoubleType, FloatType, IntegerType, StructField, StructType} +import org.apache.spark.sql.types.{DoubleType, FloatType, IntegerType, StructType} import org.apache.spark.storage.StorageLevel import org.apache.spark.util.Utils import org.apache.spark.util.collection.{OpenHashMap, OpenHashSet, SortDataFormat, Sorter} import org.apache.spark.util.random.XORShiftRandom /** + * Common params for ALS and ALSModel. + */ +private[recommendation] trait ALSModelParams extends Params with HasPredictionCol { + /** + * Param for the column name for user ids. + * Default: user + * @group param + */ + val userCol = new Param[String](this, userCol, column name for user ids) + + /** @group getParam */ + def getUserCol: String = $(userCol) + + /** + * Param for the column name for item ids. + * Default: item + * @group param + */ + val itemCol = new Param[String](this, itemCol, column name for item ids) + + /** @group getParam */ + def getItemCol: String = $(itemCol) +} + +/** * Common params for ALS. */ -private[recommendation] trait ALSParams extends Params with HasMaxIter with HasRegParam +private[recommendation] trait ALSParams extends ALSModelParams with HasMaxIter with HasRegParam with HasPredictionCol with HasCheckpointInterval with HasSeed { /** @@ -106,26 +131,6 @@ private[recommendation] trait ALSParams extends Params with HasMaxIter with HasR def getAlpha: Double = $(alpha) /** - * Param for the column name for user ids. - * Default: user - * @group param - */ - val userCol = new Param[String](this, userCol, column name for user ids) - - /** @group getParam */ - def getUserCol: String = $(userCol) - - /** - * Param for the column name for item ids. - * Default: item - * @group param - */ - val itemCol = new Param[String](this, itemCol, column name for item ids) - - /** @group getParam */ - def getItemCol: String = $(itemCol) - - /** * Param for the column name for ratings. * Default: rating * @group param @@ -156,55 +161,60 @@ private[recommendation] trait ALSParams extends Params with HasMaxIter with HasR * @return output schema */ protected def validateAndTransformSchema(schema: StructType): StructType = { -require(schema($(userCol)).dataType == IntegerType) -
spark git commit: [SPARK-7922] [MLLIB] use DataFrames for user/item factors in ALSModel
Repository: spark Updated Branches: refs/heads/master cd3d9a5c0 - db9513789 [SPARK-7922] [MLLIB] use DataFrames for user/item factors in ALSModel Expose user/item factors in DataFrames. This is to be more consistent with the pipeline API. It also helps maintain consistent APIs across languages. This PR also removed fitting params from `ALSModel`. coderxiang Author: Xiangrui Meng m...@databricks.com Closes #6468 from mengxr/SPARK-7922 and squashes the following commits: 7bfb1d5 [Xiangrui Meng] update ALSModel in PySpark 1ba5607 [Xiangrui Meng] use DataFrames for user/item factors in ALS Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/db951378 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/db951378 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/db951378 Branch: refs/heads/master Commit: db9513789756da4f16bb1fe8cf1d19500f231f54 Parents: cd3d9a5 Author: Xiangrui Meng m...@databricks.com Authored: Thu May 28 22:38:38 2015 -0700 Committer: Xiangrui Meng m...@databricks.com Committed: Thu May 28 22:38:38 2015 -0700 -- .../apache/spark/ml/recommendation/ALS.scala| 101 +++ python/pyspark/ml/recommendation.py | 30 +- python/pyspark/mllib/common.py | 5 +- 3 files changed, 89 insertions(+), 47 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/db951378/mllib/src/main/scala/org/apache/spark/ml/recommendation/ALS.scala -- diff --git a/mllib/src/main/scala/org/apache/spark/ml/recommendation/ALS.scala b/mllib/src/main/scala/org/apache/spark/ml/recommendation/ALS.scala index 900b637..df009d8 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/recommendation/ALS.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/recommendation/ALS.scala @@ -35,21 +35,46 @@ import org.apache.spark.annotation.{DeveloperApi, Experimental} import org.apache.spark.ml.{Estimator, Model} import org.apache.spark.ml.param._ import org.apache.spark.ml.param.shared._ -import org.apache.spark.ml.util.Identifiable +import org.apache.spark.ml.util.{Identifiable, SchemaUtils} import org.apache.spark.mllib.optimization.NNLS import org.apache.spark.rdd.RDD import org.apache.spark.sql.DataFrame import org.apache.spark.sql.functions._ -import org.apache.spark.sql.types.{DoubleType, FloatType, IntegerType, StructField, StructType} +import org.apache.spark.sql.types.{DoubleType, FloatType, IntegerType, StructType} import org.apache.spark.storage.StorageLevel import org.apache.spark.util.Utils import org.apache.spark.util.collection.{OpenHashMap, OpenHashSet, SortDataFormat, Sorter} import org.apache.spark.util.random.XORShiftRandom /** + * Common params for ALS and ALSModel. + */ +private[recommendation] trait ALSModelParams extends Params with HasPredictionCol { + /** + * Param for the column name for user ids. + * Default: user + * @group param + */ + val userCol = new Param[String](this, userCol, column name for user ids) + + /** @group getParam */ + def getUserCol: String = $(userCol) + + /** + * Param for the column name for item ids. + * Default: item + * @group param + */ + val itemCol = new Param[String](this, itemCol, column name for item ids) + + /** @group getParam */ + def getItemCol: String = $(itemCol) +} + +/** * Common params for ALS. */ -private[recommendation] trait ALSParams extends Params with HasMaxIter with HasRegParam +private[recommendation] trait ALSParams extends ALSModelParams with HasMaxIter with HasRegParam with HasPredictionCol with HasCheckpointInterval with HasSeed { /** @@ -106,26 +131,6 @@ private[recommendation] trait ALSParams extends Params with HasMaxIter with HasR def getAlpha: Double = $(alpha) /** - * Param for the column name for user ids. - * Default: user - * @group param - */ - val userCol = new Param[String](this, userCol, column name for user ids) - - /** @group getParam */ - def getUserCol: String = $(userCol) - - /** - * Param for the column name for item ids. - * Default: item - * @group param - */ - val itemCol = new Param[String](this, itemCol, column name for item ids) - - /** @group getParam */ - def getItemCol: String = $(itemCol) - - /** * Param for the column name for ratings. * Default: rating * @group param @@ -156,55 +161,60 @@ private[recommendation] trait ALSParams extends Params with HasMaxIter with HasR * @return output schema */ protected def validateAndTransformSchema(schema: StructType): StructType = { -require(schema($(userCol)).dataType == IntegerType) -require(schema($(itemCol)).dataType== IntegerType) +SchemaUtils.checkColumnType(schema, $(userCol), IntegerType) +
Git Push Summary
Repository: spark Updated Tags: refs/tags/v1.4.0-rc3 [created] 2d97d7a0a - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[2/2] spark git commit: Preparing development version 1.4.0-SNAPSHOT
Preparing development version 1.4.0-SNAPSHOT Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/119c93af Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/119c93af Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/119c93af Branch: refs/heads/branch-1.4 Commit: 119c93af9c8c2888465eb2fa5977a074d33594ae Parents: 2d97d7a Author: Patrick Wendell pwend...@gmail.com Authored: Thu May 28 22:57:31 2015 -0700 Committer: Patrick Wendell pwend...@gmail.com Committed: Thu May 28 22:57:31 2015 -0700 -- assembly/pom.xml | 2 +- bagel/pom.xml | 2 +- core/pom.xml | 2 +- examples/pom.xml | 2 +- external/flume-sink/pom.xml | 2 +- external/flume/pom.xml| 2 +- external/kafka-assembly/pom.xml | 2 +- external/kafka/pom.xml| 2 +- external/mqtt/pom.xml | 2 +- external/twitter/pom.xml | 2 +- external/zeromq/pom.xml | 2 +- extras/java8-tests/pom.xml| 2 +- extras/kinesis-asl/pom.xml| 2 +- extras/spark-ganglia-lgpl/pom.xml | 2 +- graphx/pom.xml| 2 +- launcher/pom.xml | 2 +- mllib/pom.xml | 2 +- network/common/pom.xml| 2 +- network/shuffle/pom.xml | 2 +- network/yarn/pom.xml | 2 +- pom.xml | 2 +- repl/pom.xml | 2 +- sql/catalyst/pom.xml | 2 +- sql/core/pom.xml | 2 +- sql/hive-thriftserver/pom.xml | 2 +- sql/hive/pom.xml | 2 +- streaming/pom.xml | 2 +- tools/pom.xml | 2 +- unsafe/pom.xml| 2 +- yarn/pom.xml | 2 +- 30 files changed, 30 insertions(+), 30 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/119c93af/assembly/pom.xml -- diff --git a/assembly/pom.xml b/assembly/pom.xml index b8a821d..626c857 100644 --- a/assembly/pom.xml +++ b/assembly/pom.xml @@ -21,7 +21,7 @@ parent groupIdorg.apache.spark/groupId artifactIdspark-parent_2.10/artifactId -version1.4.0/version +version1.4.0-SNAPSHOT/version relativePath../pom.xml/relativePath /parent http://git-wip-us.apache.org/repos/asf/spark/blob/119c93af/bagel/pom.xml -- diff --git a/bagel/pom.xml b/bagel/pom.xml index c1aa32b..1f3dec9 100644 --- a/bagel/pom.xml +++ b/bagel/pom.xml @@ -21,7 +21,7 @@ parent groupIdorg.apache.spark/groupId artifactIdspark-parent_2.10/artifactId -version1.4.0/version +version1.4.0-SNAPSHOT/version relativePath../pom.xml/relativePath /parent http://git-wip-us.apache.org/repos/asf/spark/blob/119c93af/core/pom.xml -- diff --git a/core/pom.xml b/core/pom.xml index a9b8b42..e58efe4 100644 --- a/core/pom.xml +++ b/core/pom.xml @@ -21,7 +21,7 @@ parent groupIdorg.apache.spark/groupId artifactIdspark-parent_2.10/artifactId -version1.4.0/version +version1.4.0-SNAPSHOT/version relativePath../pom.xml/relativePath /parent http://git-wip-us.apache.org/repos/asf/spark/blob/119c93af/examples/pom.xml -- diff --git a/examples/pom.xml b/examples/pom.xml index 38ff67d..e4efee7 100644 --- a/examples/pom.xml +++ b/examples/pom.xml @@ -21,7 +21,7 @@ parent groupIdorg.apache.spark/groupId artifactIdspark-parent_2.10/artifactId -version1.4.0/version +version1.4.0-SNAPSHOT/version relativePath../pom.xml/relativePath /parent http://git-wip-us.apache.org/repos/asf/spark/blob/119c93af/external/flume-sink/pom.xml -- diff --git a/external/flume-sink/pom.xml b/external/flume-sink/pom.xml index e8784eb..1f3e619 100644 --- a/external/flume-sink/pom.xml +++ b/external/flume-sink/pom.xml @@ -21,7 +21,7 @@ parent groupIdorg.apache.spark/groupId artifactIdspark-parent_2.10/artifactId -version1.4.0/version +version1.4.0-SNAPSHOT/version relativePath../../pom.xml/relativePath /parent http://git-wip-us.apache.org/repos/asf/spark/blob/119c93af/external/flume/pom.xml -- diff --git a/external/flume/pom.xml b/external/flume/pom.xml index 1794f3e..8df7edb 100644 --- a/external/flume/pom.xml +++ b/external/flume/pom.xml @@ -21,7 +21,7 @@ parent groupIdorg.apache.spark/groupId artifactIdspark-parent_2.10/artifactId -version1.4.0/version +
spark git commit: [SPARK-7932] Fix misleading scheduler delay visualization
Repository: spark Updated Branches: refs/heads/branch-1.4 1d49d8c3f - aee046dfa [SPARK-7932] Fix misleading scheduler delay visualization The existing code rounds down to the nearest percent when computing the proportion of a task's time that was spent on each phase of execution, and then computes the scheduler delay proportion as 100 - sum(all other proportions). As a result, a few extra percent can end up in the scheduler delay. This commit eliminates the rounding so that the time visualizations correspond properly to the real times. sarutak If you could take a look at this, that would be great! Not sure if there's a good reason to round here that I missed. cc shivaram Author: Kay Ousterhout kayousterh...@gmail.com Closes #6484 from kayousterhout/SPARK-7932 and squashes the following commits: 1723cc4 [Kay Ousterhout] [SPARK-7932] Fix misleading scheduler delay visualization (cherry picked from commit 04ddcd4db7801abefa9c9effe5d88413b29d713b) Signed-off-by: Kay Ousterhout kayousterh...@gmail.com Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/aee046df Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/aee046df Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/aee046df Branch: refs/heads/branch-1.4 Commit: aee046dfa111b4323edd5f4ccb36075449492952 Parents: 1d49d8c Author: Kay Ousterhout kayousterh...@gmail.com Authored: Thu May 28 22:09:49 2015 -0700 Committer: Kay Ousterhout kayousterh...@gmail.com Committed: Thu May 28 22:09:59 2015 -0700 -- core/src/main/scala/org/apache/spark/ui/jobs/StagePage.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/aee046df/core/src/main/scala/org/apache/spark/ui/jobs/StagePage.scala -- diff --git a/core/src/main/scala/org/apache/spark/ui/jobs/StagePage.scala b/core/src/main/scala/org/apache/spark/ui/jobs/StagePage.scala index 31e2e7f..b83a49f 100644 --- a/core/src/main/scala/org/apache/spark/ui/jobs/StagePage.scala +++ b/core/src/main/scala/org/apache/spark/ui/jobs/StagePage.scala @@ -527,7 +527,7 @@ private[ui] class StagePage(parent: StagesTab) extends WebUIPage(stage) { minLaunchTime = launchTime.min(minLaunchTime) maxFinishTime = finishTime.max(maxFinishTime) -def toProportion(time: Long) = (time.toDouble / totalExecutionTime * 100).toLong +def toProportion(time: Long) = time.toDouble / totalExecutionTime * 100 val metricsOpt = taskUIData.taskMetrics val shuffleReadTime = - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-7932] Fix misleading scheduler delay visualization
Repository: spark Updated Branches: refs/heads/master 834e69952 - 04ddcd4db [SPARK-7932] Fix misleading scheduler delay visualization The existing code rounds down to the nearest percent when computing the proportion of a task's time that was spent on each phase of execution, and then computes the scheduler delay proportion as 100 - sum(all other proportions). As a result, a few extra percent can end up in the scheduler delay. This commit eliminates the rounding so that the time visualizations correspond properly to the real times. sarutak If you could take a look at this, that would be great! Not sure if there's a good reason to round here that I missed. cc shivaram Author: Kay Ousterhout kayousterh...@gmail.com Closes #6484 from kayousterhout/SPARK-7932 and squashes the following commits: 1723cc4 [Kay Ousterhout] [SPARK-7932] Fix misleading scheduler delay visualization Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/04ddcd4d Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/04ddcd4d Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/04ddcd4d Branch: refs/heads/master Commit: 04ddcd4db7801abefa9c9effe5d88413b29d713b Parents: 834e699 Author: Kay Ousterhout kayousterh...@gmail.com Authored: Thu May 28 22:09:49 2015 -0700 Committer: Kay Ousterhout kayousterh...@gmail.com Committed: Thu May 28 22:09:49 2015 -0700 -- core/src/main/scala/org/apache/spark/ui/jobs/StagePage.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/04ddcd4d/core/src/main/scala/org/apache/spark/ui/jobs/StagePage.scala -- diff --git a/core/src/main/scala/org/apache/spark/ui/jobs/StagePage.scala b/core/src/main/scala/org/apache/spark/ui/jobs/StagePage.scala index 31e2e7f..b83a49f 100644 --- a/core/src/main/scala/org/apache/spark/ui/jobs/StagePage.scala +++ b/core/src/main/scala/org/apache/spark/ui/jobs/StagePage.scala @@ -527,7 +527,7 @@ private[ui] class StagePage(parent: StagesTab) extends WebUIPage(stage) { minLaunchTime = launchTime.min(minLaunchTime) maxFinishTime = finishTime.max(maxFinishTime) -def toProportion(time: Long) = (time.toDouble / totalExecutionTime * 100).toLong +def toProportion(time: Long) = time.toDouble / totalExecutionTime * 100 val metricsOpt = taskUIData.taskMetrics val shuffleReadTime = - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-7930] [CORE] [STREAMING] Fixed shutdown hook priorities
Repository: spark Updated Branches: refs/heads/master 04ddcd4db - cd3d9a5c0 [SPARK-7930] [CORE] [STREAMING] Fixed shutdown hook priorities Shutdown hook for temp directories had priority 100 while SparkContext was 50. So the local root directory was deleted before SparkContext was shutdown. This leads to scary errors on running jobs, at the time of shutdown. This is especially a problem when running streaming examples, where Ctrl-C is the only way to shutdown. The fix in this PR is to make the temp directory shutdown priority lower than SparkContext, so that the temp dirs are the last thing to get deleted, after the SparkContext has been shut down. Also, the DiskBlockManager shutdown priority is change from default 100 to temp_dir_prio + 1, so that it gets invoked just before all temp dirs are cleared. Author: Tathagata Das tathagata.das1...@gmail.com Closes #6482 from tdas/SPARK-7930 and squashes the following commits: d7cbeb5 [Tathagata Das] Removed unnecessary line 1514d0b [Tathagata Das] Fixed shutdown hook priorities Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/cd3d9a5c Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/cd3d9a5c Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/cd3d9a5c Branch: refs/heads/master Commit: cd3d9a5c0c3e77098a72c85dffe4a27737009ae7 Parents: 04ddcd4 Author: Tathagata Das tathagata.das1...@gmail.com Authored: Thu May 28 22:28:13 2015 -0700 Committer: Patrick Wendell patr...@databricks.com Committed: Thu May 28 22:28:13 2015 -0700 -- .../org/apache/spark/storage/DiskBlockManager.scala | 4 ++-- core/src/main/scala/org/apache/spark/util/Utils.scala | 12 ++-- 2 files changed, 12 insertions(+), 4 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/cd3d9a5c/core/src/main/scala/org/apache/spark/storage/DiskBlockManager.scala -- diff --git a/core/src/main/scala/org/apache/spark/storage/DiskBlockManager.scala b/core/src/main/scala/org/apache/spark/storage/DiskBlockManager.scala index 2a44477..d441a4d 100644 --- a/core/src/main/scala/org/apache/spark/storage/DiskBlockManager.scala +++ b/core/src/main/scala/org/apache/spark/storage/DiskBlockManager.scala @@ -139,8 +139,8 @@ private[spark] class DiskBlockManager(blockManager: BlockManager, conf: SparkCon } private def addShutdownHook(): AnyRef = { -Utils.addShutdownHook { () = - logDebug(Shutdown hook called) +Utils.addShutdownHook(Utils.TEMP_DIR_SHUTDOWN_PRIORITY + 1) { () = + logInfo(Shutdown hook called) DiskBlockManager.this.doStop() } } http://git-wip-us.apache.org/repos/asf/spark/blob/cd3d9a5c/core/src/main/scala/org/apache/spark/util/Utils.scala -- diff --git a/core/src/main/scala/org/apache/spark/util/Utils.scala b/core/src/main/scala/org/apache/spark/util/Utils.scala index 763d4db..693e1a0 100644 --- a/core/src/main/scala/org/apache/spark/util/Utils.scala +++ b/core/src/main/scala/org/apache/spark/util/Utils.scala @@ -73,6 +73,13 @@ private[spark] object Utils extends Logging { */ val SPARK_CONTEXT_SHUTDOWN_PRIORITY = 50 + /** + * The shutdown priority of temp directory must be lower than the SparkContext shutdown + * priority. Otherwise cleaning the temp directories while Spark jobs are running can + * throw undesirable errors at the time of shutdown. + */ + val TEMP_DIR_SHUTDOWN_PRIORITY = 25 + private val MAX_DIR_CREATION_ATTEMPTS: Int = 10 @volatile private var localRootDirs: Array[String] = null @@ -189,10 +196,11 @@ private[spark] object Utils extends Logging { private val shutdownDeleteTachyonPaths = new scala.collection.mutable.HashSet[String]() // Add a shutdown hook to delete the temp dirs when the JVM exits - addShutdownHook { () = -logDebug(Shutdown hook called) + addShutdownHook(TEMP_DIR_SHUTDOWN_PRIORITY) { () = +logInfo(Shutdown hook called) shutdownDeletePaths.foreach { dirPath = try { +logInfo(Deleting directory + dirPath) Utils.deleteRecursively(new File(dirPath)) } catch { case e: Exception = logError(sException while deleting Spark temp dir: $dirPath, e) - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [HOTFIX] Minor style fix from last commit
Repository: spark Updated Branches: refs/heads/master e714ecf27 - 36067ce39 [HOTFIX] Minor style fix from last commit Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/36067ce3 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/36067ce3 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/36067ce3 Branch: refs/heads/master Commit: 36067ce398e2949c2f122625e67fd5497febdee6 Parents: e714ecf Author: Patrick Wendell patr...@databricks.com Authored: Thu May 28 22:48:02 2015 -0700 Committer: Patrick Wendell patr...@databricks.com Committed: Thu May 28 22:48:02 2015 -0700 -- .../org/apache/spark/streaming/dstream/SocketInputDStream.scala | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/36067ce3/streaming/src/main/scala/org/apache/spark/streaming/dstream/SocketInputDStream.scala -- diff --git a/streaming/src/main/scala/org/apache/spark/streaming/dstream/SocketInputDStream.scala b/streaming/src/main/scala/org/apache/spark/streaming/dstream/SocketInputDStream.scala index 96e0a9c..5ce5b7a 100644 --- a/streaming/src/main/scala/org/apache/spark/streaming/dstream/SocketInputDStream.scala +++ b/streaming/src/main/scala/org/apache/spark/streaming/dstream/SocketInputDStream.scala @@ -78,8 +78,9 @@ class SocketReceiver[T: ClassTag]( } if (!isStopped()) { restart(Socket data stream had no more data) + } else { +logInfo(Stopped receiving) } - logInfo(Stopped receiving) } catch { case e: java.net.ConnectException = restart(Error connecting to + host + : + port, e) - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[1/2] spark git commit: [SPARK-7931] [STREAMING] Do not restart receiver when stopped
Repository: spark Updated Branches: refs/heads/branch-1.4 68559423a - e419821c3 [SPARK-7931] [STREAMING] Do not restart receiver when stopped Attempts to restart the socket receiver when it is supposed to be stopped causes undesirable error messages. Author: Tathagata Das tathagata.das1...@gmail.com Closes #6483 from tdas/SPARK-7931 and squashes the following commits: 09aeee1 [Tathagata Das] Do not restart receiver when stopped Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/7a52fdf2 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/7a52fdf2 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/7a52fdf2 Branch: refs/heads/branch-1.4 Commit: 7a52fdf25f8d635ba05796abb0c491454d7869cf Parents: 6855942 Author: Tathagata Das tathagata.das1...@gmail.com Authored: Thu May 28 22:39:21 2015 -0700 Committer: Patrick Wendell patr...@databricks.com Committed: Thu May 28 22:48:23 2015 -0700 -- .../spark/streaming/dstream/SocketInputDStream.scala | 11 --- 1 file changed, 8 insertions(+), 3 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/7a52fdf2/streaming/src/main/scala/org/apache/spark/streaming/dstream/SocketInputDStream.scala -- diff --git a/streaming/src/main/scala/org/apache/spark/streaming/dstream/SocketInputDStream.scala b/streaming/src/main/scala/org/apache/spark/streaming/dstream/SocketInputDStream.scala index 8b72bcf..96e0a9c 100644 --- a/streaming/src/main/scala/org/apache/spark/streaming/dstream/SocketInputDStream.scala +++ b/streaming/src/main/scala/org/apache/spark/streaming/dstream/SocketInputDStream.scala @@ -17,6 +17,8 @@ package org.apache.spark.streaming.dstream +import scala.util.control.NonFatal + import org.apache.spark.streaming.StreamingContext import org.apache.spark.storage.StorageLevel import org.apache.spark.util.NextIterator @@ -74,13 +76,16 @@ class SocketReceiver[T: ClassTag]( while(!isStopped iterator.hasNext) { store(iterator.next) } + if (!isStopped()) { +restart(Socket data stream had no more data) + } logInfo(Stopped receiving) - restart(Retrying connecting to + host + : + port) } catch { case e: java.net.ConnectException = restart(Error connecting to + host + : + port, e) - case t: Throwable = -restart(Error receiving data, t) + case NonFatal(e) = +logWarning(Error receiving data, e) +restart(Error receiving data, e) } finally { if (socket != null) { socket.close() - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [MINOR] fix RegressionEvaluator doc
Repository: spark Updated Branches: refs/heads/master c45d58c14 - 834e69952 [MINOR] fix RegressionEvaluator doc `make clean html` under `python/doc` returns ~~~ /Users/meng/src/spark/python/pyspark/ml/evaluation.py:docstring of pyspark.ml.evaluation.RegressionEvaluator.setParams:3: WARNING: Definition list ends without a blank line; unexpected unindent. ~~~ harsha2010 Author: Xiangrui Meng m...@databricks.com Closes #6469 from mengxr/fix-regression-evaluator-doc and squashes the following commits: 91e2dad [Xiangrui Meng] fix RegressionEvaluator doc Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/834e6995 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/834e6995 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/834e6995 Branch: refs/heads/master Commit: 834e699524583a7ebfe9e83b3900ec503150deca Parents: c45d58c Author: Xiangrui Meng m...@databricks.com Authored: Thu May 28 21:26:43 2015 -0700 Committer: Xiangrui Meng m...@databricks.com Committed: Thu May 28 21:26:43 2015 -0700 -- python/pyspark/ml/evaluation.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/834e6995/python/pyspark/ml/evaluation.py -- diff --git a/python/pyspark/ml/evaluation.py b/python/pyspark/ml/evaluation.py index 23c3716..d8ddb78 100644 --- a/python/pyspark/ml/evaluation.py +++ b/python/pyspark/ml/evaluation.py @@ -205,7 +205,7 @@ class RegressionEvaluator(JavaEvaluator, HasLabelCol, HasPredictionCol): def setParams(self, predictionCol=prediction, labelCol=label, metricName=rmse): -setParams(self, predictionCol=prediction, labelCol=label, +setParams(self, predictionCol=prediction, labelCol=label, \ metricName=rmse) Sets params for regression evaluator. - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [MINOR] fix RegressionEvaluator doc
Repository: spark Updated Branches: refs/heads/branch-1.4 6e99dd5d0 - 1d49d8c3f [MINOR] fix RegressionEvaluator doc `make clean html` under `python/doc` returns ~~~ /Users/meng/src/spark/python/pyspark/ml/evaluation.py:docstring of pyspark.ml.evaluation.RegressionEvaluator.setParams:3: WARNING: Definition list ends without a blank line; unexpected unindent. ~~~ harsha2010 Author: Xiangrui Meng m...@databricks.com Closes #6469 from mengxr/fix-regression-evaluator-doc and squashes the following commits: 91e2dad [Xiangrui Meng] fix RegressionEvaluator doc (cherry picked from commit 834e699524583a7ebfe9e83b3900ec503150deca) Signed-off-by: Xiangrui Meng m...@databricks.com Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/1d49d8c3 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/1d49d8c3 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/1d49d8c3 Branch: refs/heads/branch-1.4 Commit: 1d49d8c3fd297f7a6269693fbec623ddec96b279 Parents: 6e99dd5 Author: Xiangrui Meng m...@databricks.com Authored: Thu May 28 21:26:43 2015 -0700 Committer: Xiangrui Meng m...@databricks.com Committed: Thu May 28 21:26:49 2015 -0700 -- python/pyspark/ml/evaluation.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/1d49d8c3/python/pyspark/ml/evaluation.py -- diff --git a/python/pyspark/ml/evaluation.py b/python/pyspark/ml/evaluation.py index 23c3716..d8ddb78 100644 --- a/python/pyspark/ml/evaluation.py +++ b/python/pyspark/ml/evaluation.py @@ -205,7 +205,7 @@ class RegressionEvaluator(JavaEvaluator, HasLabelCol, HasPredictionCol): def setParams(self, predictionCol=prediction, labelCol=label, metricName=rmse): -setParams(self, predictionCol=prediction, labelCol=label, +setParams(self, predictionCol=prediction, labelCol=label, \ metricName=rmse) Sets params for regression evaluator. - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-7926] [PYSPARK] use the official Pyrolite release
Repository: spark Updated Branches: refs/heads/master b069ad23d - c45d58c14 [SPARK-7926] [PYSPARK] use the official Pyrolite release Switch to the official Pyrolite release from the one published under `org.spark-project`. Thanks irmen for making the releases on Maven Central. We didn't upgrade to 4.6 because we don't have enough time for QA. I excludes `serpent` from its dependencies because we don't use it in Spark. ~~~ [info] +-net.jpountz.lz4:lz4:1.3.0 [info] +-net.razorvine:pyrolite:4.4 [info] +-net.sf.py4j:py4j:0.8.2.1 ~~~ davies Author: Xiangrui Meng m...@databricks.com Closes #6472 from mengxr/SPARK-7926 and squashes the following commits: 7b3c6bf [Xiangrui Meng] use the official Pyrolite release Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/c45d58c1 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/c45d58c1 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/c45d58c1 Branch: refs/heads/master Commit: c45d58c143d68cb807186acc9d060daa8549dd5c Parents: b069ad2 Author: Xiangrui Meng m...@databricks.com Authored: Thu May 28 21:20:54 2015 -0700 Committer: Xiangrui Meng m...@databricks.com Committed: Thu May 28 21:20:54 2015 -0700 -- core/pom.xml | 8 +++- 1 file changed, 7 insertions(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/c45d58c1/core/pom.xml -- diff --git a/core/pom.xml b/core/pom.xml index bfa49d0..e58efe4 100644 --- a/core/pom.xml +++ b/core/pom.xml @@ -377,9 +377,15 @@ scopetest/scope /dependency dependency - groupIdorg.spark-project/groupId + groupIdnet.razorvine/groupId artifactIdpyrolite/artifactId version4.4/version + exclusions +exclusion + groupIdnet.razorvine/groupId + artifactIdserpent/artifactId +/exclusion + /exclusions /dependency dependency groupIdnet.sf.py4j/groupId - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-7926] [PYSPARK] use the official Pyrolite release
Repository: spark Updated Branches: refs/heads/branch-1.4 b3a590061 - 6e99dd5d0 [SPARK-7926] [PYSPARK] use the official Pyrolite release Switch to the official Pyrolite release from the one published under `org.spark-project`. Thanks irmen for making the releases on Maven Central. We didn't upgrade to 4.6 because we don't have enough time for QA. I excludes `serpent` from its dependencies because we don't use it in Spark. ~~~ [info] +-net.jpountz.lz4:lz4:1.3.0 [info] +-net.razorvine:pyrolite:4.4 [info] +-net.sf.py4j:py4j:0.8.2.1 ~~~ davies Author: Xiangrui Meng m...@databricks.com Closes #6472 from mengxr/SPARK-7926 and squashes the following commits: 7b3c6bf [Xiangrui Meng] use the official Pyrolite release (cherry picked from commit c45d58c143d68cb807186acc9d060daa8549dd5c) Signed-off-by: Xiangrui Meng m...@databricks.com Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/6e99dd5d Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/6e99dd5d Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/6e99dd5d Branch: refs/heads/branch-1.4 Commit: 6e99dd5d042e8a3e49937769a846bef8a66214f8 Parents: b3a5900 Author: Xiangrui Meng m...@databricks.com Authored: Thu May 28 21:20:54 2015 -0700 Committer: Xiangrui Meng m...@databricks.com Committed: Thu May 28 21:21:01 2015 -0700 -- core/pom.xml | 8 +++- 1 file changed, 7 insertions(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/6e99dd5d/core/pom.xml -- diff --git a/core/pom.xml b/core/pom.xml index bfa49d0..e58efe4 100644 --- a/core/pom.xml +++ b/core/pom.xml @@ -377,9 +377,15 @@ scopetest/scope /dependency dependency - groupIdorg.spark-project/groupId + groupIdnet.razorvine/groupId artifactIdpyrolite/artifactId version4.4/version + exclusions +exclusion + groupIdnet.razorvine/groupId + artifactIdserpent/artifactId +/exclusion + /exclusions /dependency dependency groupIdnet.sf.py4j/groupId - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-7930] [CORE] [STREAMING] Fixed shutdown hook priorities
Repository: spark Updated Branches: refs/heads/branch-1.4 aee046dfa - f7cb272b7 [SPARK-7930] [CORE] [STREAMING] Fixed shutdown hook priorities Shutdown hook for temp directories had priority 100 while SparkContext was 50. So the local root directory was deleted before SparkContext was shutdown. This leads to scary errors on running jobs, at the time of shutdown. This is especially a problem when running streaming examples, where Ctrl-C is the only way to shutdown. The fix in this PR is to make the temp directory shutdown priority lower than SparkContext, so that the temp dirs are the last thing to get deleted, after the SparkContext has been shut down. Also, the DiskBlockManager shutdown priority is change from default 100 to temp_dir_prio + 1, so that it gets invoked just before all temp dirs are cleared. Author: Tathagata Das tathagata.das1...@gmail.com Closes #6482 from tdas/SPARK-7930 and squashes the following commits: d7cbeb5 [Tathagata Das] Removed unnecessary line 1514d0b [Tathagata Das] Fixed shutdown hook priorities (cherry picked from commit cd3d9a5c0c3e77098a72c85dffe4a27737009ae7) Signed-off-by: Patrick Wendell patr...@databricks.com Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/f7cb272b Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/f7cb272b Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/f7cb272b Branch: refs/heads/branch-1.4 Commit: f7cb272b7c77de42681287925922d41248efca46 Parents: aee046d Author: Tathagata Das tathagata.das1...@gmail.com Authored: Thu May 28 22:28:13 2015 -0700 Committer: Patrick Wendell patr...@databricks.com Committed: Thu May 28 22:28:31 2015 -0700 -- .../org/apache/spark/storage/DiskBlockManager.scala | 4 ++-- core/src/main/scala/org/apache/spark/util/Utils.scala | 12 ++-- 2 files changed, 12 insertions(+), 4 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/f7cb272b/core/src/main/scala/org/apache/spark/storage/DiskBlockManager.scala -- diff --git a/core/src/main/scala/org/apache/spark/storage/DiskBlockManager.scala b/core/src/main/scala/org/apache/spark/storage/DiskBlockManager.scala index 2a44477..d441a4d 100644 --- a/core/src/main/scala/org/apache/spark/storage/DiskBlockManager.scala +++ b/core/src/main/scala/org/apache/spark/storage/DiskBlockManager.scala @@ -139,8 +139,8 @@ private[spark] class DiskBlockManager(blockManager: BlockManager, conf: SparkCon } private def addShutdownHook(): AnyRef = { -Utils.addShutdownHook { () = - logDebug(Shutdown hook called) +Utils.addShutdownHook(Utils.TEMP_DIR_SHUTDOWN_PRIORITY + 1) { () = + logInfo(Shutdown hook called) DiskBlockManager.this.doStop() } } http://git-wip-us.apache.org/repos/asf/spark/blob/f7cb272b/core/src/main/scala/org/apache/spark/util/Utils.scala -- diff --git a/core/src/main/scala/org/apache/spark/util/Utils.scala b/core/src/main/scala/org/apache/spark/util/Utils.scala index 763d4db..693e1a0 100644 --- a/core/src/main/scala/org/apache/spark/util/Utils.scala +++ b/core/src/main/scala/org/apache/spark/util/Utils.scala @@ -73,6 +73,13 @@ private[spark] object Utils extends Logging { */ val SPARK_CONTEXT_SHUTDOWN_PRIORITY = 50 + /** + * The shutdown priority of temp directory must be lower than the SparkContext shutdown + * priority. Otherwise cleaning the temp directories while Spark jobs are running can + * throw undesirable errors at the time of shutdown. + */ + val TEMP_DIR_SHUTDOWN_PRIORITY = 25 + private val MAX_DIR_CREATION_ATTEMPTS: Int = 10 @volatile private var localRootDirs: Array[String] = null @@ -189,10 +196,11 @@ private[spark] object Utils extends Logging { private val shutdownDeleteTachyonPaths = new scala.collection.mutable.HashSet[String]() // Add a shutdown hook to delete the temp dirs when the JVM exits - addShutdownHook { () = -logDebug(Shutdown hook called) + addShutdownHook(TEMP_DIR_SHUTDOWN_PRIORITY) { () = +logInfo(Shutdown hook called) shutdownDeletePaths.foreach { dirPath = try { +logInfo(Deleting directory + dirPath) Utils.deleteRecursively(new File(dirPath)) } catch { case e: Exception = logError(sException while deleting Spark temp dir: $dirPath, e) - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-7931] [STREAMING] Do not restart receiver when stopped
Repository: spark Updated Branches: refs/heads/master db9513789 - e714ecf27 [SPARK-7931] [STREAMING] Do not restart receiver when stopped Attempts to restart the socket receiver when it is supposed to be stopped causes undesirable error messages. Author: Tathagata Das tathagata.das1...@gmail.com Closes #6483 from tdas/SPARK-7931 and squashes the following commits: 09aeee1 [Tathagata Das] Do not restart receiver when stopped Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/e714ecf2 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/e714ecf2 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/e714ecf2 Branch: refs/heads/master Commit: e714ecf277a7412ea8263662977fe3ad1f794975 Parents: db95137 Author: Tathagata Das tathagata.das1...@gmail.com Authored: Thu May 28 22:39:21 2015 -0700 Committer: Patrick Wendell patr...@databricks.com Committed: Thu May 28 22:39:25 2015 -0700 -- .../spark/streaming/dstream/SocketInputDStream.scala | 11 --- 1 file changed, 8 insertions(+), 3 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/e714ecf2/streaming/src/main/scala/org/apache/spark/streaming/dstream/SocketInputDStream.scala -- diff --git a/streaming/src/main/scala/org/apache/spark/streaming/dstream/SocketInputDStream.scala b/streaming/src/main/scala/org/apache/spark/streaming/dstream/SocketInputDStream.scala index 8b72bcf..96e0a9c 100644 --- a/streaming/src/main/scala/org/apache/spark/streaming/dstream/SocketInputDStream.scala +++ b/streaming/src/main/scala/org/apache/spark/streaming/dstream/SocketInputDStream.scala @@ -17,6 +17,8 @@ package org.apache.spark.streaming.dstream +import scala.util.control.NonFatal + import org.apache.spark.streaming.StreamingContext import org.apache.spark.storage.StorageLevel import org.apache.spark.util.NextIterator @@ -74,13 +76,16 @@ class SocketReceiver[T: ClassTag]( while(!isStopped iterator.hasNext) { store(iterator.next) } + if (!isStopped()) { +restart(Socket data stream had no more data) + } logInfo(Stopped receiving) - restart(Retrying connecting to + host + : + port) } catch { case e: java.net.ConnectException = restart(Error connecting to + host + : + port, e) - case t: Throwable = -restart(Error receiving data, t) + case NonFatal(e) = +logWarning(Error receiving data, e) +restart(Error receiving data, e) } finally { if (socket != null) { socket.close() - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[2/2] spark git commit: [HOTFIX] Minor style fix from last commit
[HOTFIX] Minor style fix from last commit Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/e419821c Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/e419821c Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/e419821c Branch: refs/heads/branch-1.4 Commit: e419821c3b10e59e9765c6d41d80694772e5c772 Parents: 7a52fdf Author: Patrick Wendell patr...@databricks.com Authored: Thu May 28 22:48:02 2015 -0700 Committer: Patrick Wendell patr...@databricks.com Committed: Thu May 28 22:48:25 2015 -0700 -- .../org/apache/spark/streaming/dstream/SocketInputDStream.scala | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/e419821c/streaming/src/main/scala/org/apache/spark/streaming/dstream/SocketInputDStream.scala -- diff --git a/streaming/src/main/scala/org/apache/spark/streaming/dstream/SocketInputDStream.scala b/streaming/src/main/scala/org/apache/spark/streaming/dstream/SocketInputDStream.scala index 96e0a9c..5ce5b7a 100644 --- a/streaming/src/main/scala/org/apache/spark/streaming/dstream/SocketInputDStream.scala +++ b/streaming/src/main/scala/org/apache/spark/streaming/dstream/SocketInputDStream.scala @@ -78,8 +78,9 @@ class SocketReceiver[T: ClassTag]( } if (!isStopped()) { restart(Socket data stream had no more data) + } else { +logInfo(Stopped receiving) } - logInfo(Stopped receiving) } catch { case e: java.net.ConnectException = restart(Error connecting to + host + : + port, e) - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[1/2] spark git commit: Preparing Spark release v1.4.0-rc3
Repository: spark Updated Branches: refs/heads/branch-1.4 e419821c3 - 119c93af9 Preparing Spark release v1.4.0-rc3 Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/2d97d7a0 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/2d97d7a0 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/2d97d7a0 Branch: refs/heads/branch-1.4 Commit: 2d97d7a0aa5740aacdb90ef646175770b7610c58 Parents: e419821 Author: Patrick Wendell pwend...@gmail.com Authored: Thu May 28 22:57:26 2015 -0700 Committer: Patrick Wendell pwend...@gmail.com Committed: Thu May 28 22:57:26 2015 -0700 -- assembly/pom.xml | 2 +- bagel/pom.xml | 2 +- core/pom.xml | 2 +- examples/pom.xml | 2 +- external/flume-sink/pom.xml | 2 +- external/flume/pom.xml| 2 +- external/kafka-assembly/pom.xml | 2 +- external/kafka/pom.xml| 2 +- external/mqtt/pom.xml | 2 +- external/twitter/pom.xml | 2 +- external/zeromq/pom.xml | 2 +- extras/java8-tests/pom.xml| 2 +- extras/kinesis-asl/pom.xml| 2 +- extras/spark-ganglia-lgpl/pom.xml | 2 +- graphx/pom.xml| 2 +- launcher/pom.xml | 2 +- mllib/pom.xml | 2 +- network/common/pom.xml| 2 +- network/shuffle/pom.xml | 2 +- network/yarn/pom.xml | 2 +- pom.xml | 2 +- repl/pom.xml | 2 +- sql/catalyst/pom.xml | 2 +- sql/core/pom.xml | 2 +- sql/hive-thriftserver/pom.xml | 2 +- sql/hive/pom.xml | 2 +- streaming/pom.xml | 2 +- tools/pom.xml | 2 +- unsafe/pom.xml| 2 +- yarn/pom.xml | 2 +- 30 files changed, 30 insertions(+), 30 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/2d97d7a0/assembly/pom.xml -- diff --git a/assembly/pom.xml b/assembly/pom.xml index 626c857..b8a821d 100644 --- a/assembly/pom.xml +++ b/assembly/pom.xml @@ -21,7 +21,7 @@ parent groupIdorg.apache.spark/groupId artifactIdspark-parent_2.10/artifactId -version1.4.0-SNAPSHOT/version +version1.4.0/version relativePath../pom.xml/relativePath /parent http://git-wip-us.apache.org/repos/asf/spark/blob/2d97d7a0/bagel/pom.xml -- diff --git a/bagel/pom.xml b/bagel/pom.xml index 1f3dec9..c1aa32b 100644 --- a/bagel/pom.xml +++ b/bagel/pom.xml @@ -21,7 +21,7 @@ parent groupIdorg.apache.spark/groupId artifactIdspark-parent_2.10/artifactId -version1.4.0-SNAPSHOT/version +version1.4.0/version relativePath../pom.xml/relativePath /parent http://git-wip-us.apache.org/repos/asf/spark/blob/2d97d7a0/core/pom.xml -- diff --git a/core/pom.xml b/core/pom.xml index e58efe4..a9b8b42 100644 --- a/core/pom.xml +++ b/core/pom.xml @@ -21,7 +21,7 @@ parent groupIdorg.apache.spark/groupId artifactIdspark-parent_2.10/artifactId -version1.4.0-SNAPSHOT/version +version1.4.0/version relativePath../pom.xml/relativePath /parent http://git-wip-us.apache.org/repos/asf/spark/blob/2d97d7a0/examples/pom.xml -- diff --git a/examples/pom.xml b/examples/pom.xml index e4efee7..38ff67d 100644 --- a/examples/pom.xml +++ b/examples/pom.xml @@ -21,7 +21,7 @@ parent groupIdorg.apache.spark/groupId artifactIdspark-parent_2.10/artifactId -version1.4.0-SNAPSHOT/version +version1.4.0/version relativePath../pom.xml/relativePath /parent http://git-wip-us.apache.org/repos/asf/spark/blob/2d97d7a0/external/flume-sink/pom.xml -- diff --git a/external/flume-sink/pom.xml b/external/flume-sink/pom.xml index 1f3e619..e8784eb 100644 --- a/external/flume-sink/pom.xml +++ b/external/flume-sink/pom.xml @@ -21,7 +21,7 @@ parent groupIdorg.apache.spark/groupId artifactIdspark-parent_2.10/artifactId -version1.4.0-SNAPSHOT/version +version1.4.0/version relativePath../../pom.xml/relativePath /parent http://git-wip-us.apache.org/repos/asf/spark/blob/2d97d7a0/external/flume/pom.xml -- diff --git a/external/flume/pom.xml b/external/flume/pom.xml index 8df7edb..1794f3e 100644 --- a/external/flume/pom.xml +++ b/external/flume/pom.xml @@ -21,7 +21,7 @@ parent groupIdorg.apache.spark/groupId
spark git commit: [SPARK-7782] fixed sort arrow issue
Repository: spark Updated Branches: refs/heads/master 35410614d - e838a25bd [SPARK-7782] fixed sort arrow issue Current behaviour:: In spark UI ![screen shot 2015-05-27 at 3 27 51 pm](https://cloud.githubusercontent.com/assets/3919211/7837541/47d330ba-04a5-11e5-89d1-e5b11da1a513.png) In YARN ![screen shot 2015-05-27 at 3](https://cloud.githubusercontent.com/assets/3919211/7837594/aebd1d36-04a5-11e5-8216-86e03c07d2bd.png) In jira ![screen shot 2015-05-27 at 3_2](https://cloud.githubusercontent.com/assets/3919211/7837616/d3fedce2-04a5-11e5-9e68-960ed54e5d83.png) Author: zuxqoj sbshek...@gmail.com Closes #6437 from zuxqoj/SPARK-7782_PR and squashes the following commits: cd068b9 [zuxqoj] [SPARK-7782] fixed sort arrow issue Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/e838a25b Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/e838a25b Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/e838a25b Branch: refs/heads/master Commit: e838a25bdb5603ef05e779225704c972ce436145 Parents: 3541061 Author: zuxqoj sbshek...@gmail.com Authored: Wed May 27 23:13:13 2015 -0700 Committer: Reynold Xin r...@databricks.com Committed: Wed May 27 23:13:13 2015 -0700 -- .../src/main/resources/org/apache/spark/ui/static/sorttable.js | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/e838a25b/core/src/main/resources/org/apache/spark/ui/static/sorttable.js -- diff --git a/core/src/main/resources/org/apache/spark/ui/static/sorttable.js b/core/src/main/resources/org/apache/spark/ui/static/sorttable.js index dbacbf1..dde6069 100644 --- a/core/src/main/resources/org/apache/spark/ui/static/sorttable.js +++ b/core/src/main/resources/org/apache/spark/ui/static/sorttable.js @@ -100,7 +100,7 @@ sorttable = { this.removeChild(document.getElementById('sorttable_sortfwdind')); sortrevind = document.createElement('span'); sortrevind.id = sorttable_sortrevind; -sortrevind.innerHTML = stIsIE ? 'nbspfont face=webdings5/font' : 'nbsp;#x25B4;'; +sortrevind.innerHTML = stIsIE ? 'nbspfont face=webdings5/font' : 'nbsp;#x25BE;'; this.appendChild(sortrevind); return; } @@ -113,7 +113,7 @@ sorttable = { this.removeChild(document.getElementById('sorttable_sortrevind')); sortfwdind = document.createElement('span'); sortfwdind.id = sorttable_sortfwdind; -sortfwdind.innerHTML = stIsIE ? 'nbspfont face=webdings6/font' : 'nbsp;#x25BE;'; +sortfwdind.innerHTML = stIsIE ? 'nbspfont face=webdings6/font' : 'nbsp;#x25B4;'; this.appendChild(sortfwdind); return; } @@ -134,7 +134,7 @@ sorttable = { this.className += ' sorttable_sorted'; sortfwdind = document.createElement('span'); sortfwdind.id = sorttable_sortfwdind; - sortfwdind.innerHTML = stIsIE ? 'nbspfont face=webdings6/font' : 'nbsp;#x25BE;'; + sortfwdind.innerHTML = stIsIE ? 'nbspfont face=webdings6/font' : 'nbsp;#x25B4;'; this.appendChild(sortfwdind); // build an array to sort. This is a Schwartzian transform thing, - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-7782] fixed sort arrow issue
Repository: spark Updated Branches: refs/heads/branch-1.4 63be026da - bd568df22 [SPARK-7782] fixed sort arrow issue Current behaviour:: In spark UI ![screen shot 2015-05-27 at 3 27 51 pm](https://cloud.githubusercontent.com/assets/3919211/7837541/47d330ba-04a5-11e5-89d1-e5b11da1a513.png) In YARN ![screen shot 2015-05-27 at 3](https://cloud.githubusercontent.com/assets/3919211/7837594/aebd1d36-04a5-11e5-8216-86e03c07d2bd.png) In jira ![screen shot 2015-05-27 at 3_2](https://cloud.githubusercontent.com/assets/3919211/7837616/d3fedce2-04a5-11e5-9e68-960ed54e5d83.png) Author: zuxqoj sbshek...@gmail.com Closes #6437 from zuxqoj/SPARK-7782_PR and squashes the following commits: cd068b9 [zuxqoj] [SPARK-7782] fixed sort arrow issue (cherry picked from commit e838a25bdb5603ef05e779225704c972ce436145) Signed-off-by: Reynold Xin r...@databricks.com Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/bd568df2 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/bd568df2 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/bd568df2 Branch: refs/heads/branch-1.4 Commit: bd568df22445a1ca5183ce357410ef7a76f5bb81 Parents: 63be026 Author: zuxqoj sbshek...@gmail.com Authored: Wed May 27 23:13:13 2015 -0700 Committer: Reynold Xin r...@databricks.com Committed: Wed May 27 23:13:19 2015 -0700 -- .../src/main/resources/org/apache/spark/ui/static/sorttable.js | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/bd568df2/core/src/main/resources/org/apache/spark/ui/static/sorttable.js -- diff --git a/core/src/main/resources/org/apache/spark/ui/static/sorttable.js b/core/src/main/resources/org/apache/spark/ui/static/sorttable.js index dbacbf1..dde6069 100644 --- a/core/src/main/resources/org/apache/spark/ui/static/sorttable.js +++ b/core/src/main/resources/org/apache/spark/ui/static/sorttable.js @@ -100,7 +100,7 @@ sorttable = { this.removeChild(document.getElementById('sorttable_sortfwdind')); sortrevind = document.createElement('span'); sortrevind.id = sorttable_sortrevind; -sortrevind.innerHTML = stIsIE ? 'nbspfont face=webdings5/font' : 'nbsp;#x25B4;'; +sortrevind.innerHTML = stIsIE ? 'nbspfont face=webdings5/font' : 'nbsp;#x25BE;'; this.appendChild(sortrevind); return; } @@ -113,7 +113,7 @@ sorttable = { this.removeChild(document.getElementById('sorttable_sortrevind')); sortfwdind = document.createElement('span'); sortfwdind.id = sorttable_sortfwdind; -sortfwdind.innerHTML = stIsIE ? 'nbspfont face=webdings6/font' : 'nbsp;#x25BE;'; +sortfwdind.innerHTML = stIsIE ? 'nbspfont face=webdings6/font' : 'nbsp;#x25B4;'; this.appendChild(sortfwdind); return; } @@ -134,7 +134,7 @@ sorttable = { this.className += ' sorttable_sorted'; sortfwdind = document.createElement('span'); sortfwdind.id = sorttable_sortfwdind; - sortfwdind.innerHTML = stIsIE ? 'nbspfont face=webdings6/font' : 'nbsp;#x25BE;'; + sortfwdind.innerHTML = stIsIE ? 'nbspfont face=webdings6/font' : 'nbsp;#x25B4;'; this.appendChild(sortfwdind); // build an array to sort. This is a Schwartzian transform thing, - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
Git Push Summary
Repository: spark Updated Tags: refs/tags/v1.4.0-rc3 [deleted] 4983dfc87 - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-7895] [STREAMING] [EXAMPLES] Move Kafka examples from scala-2.10/src to src
Repository: spark Updated Branches: refs/heads/branch-1.4 bd568df22 - ab62d73dd [SPARK-7895] [STREAMING] [EXAMPLES] Move Kafka examples from scala-2.10/src to src Since `spark-streaming-kafka` now is published for both Scala 2.10 and 2.11, we can move `KafkaWordCount` and `DirectKafkaWordCount` from `examples/scala-2.10/src/` to `examples/src/` so that they will appear in `spark-examples-***-jar` for Scala 2.11. Author: zsxwing zsxw...@gmail.com Closes #6436 from zsxwing/SPARK-7895 and squashes the following commits: c6052f1 [zsxwing] Update examples/pom.xml 0bcfa87 [zsxwing] Fix the sleep time b9d1256 [zsxwing] Move Kafka examples from scala-2.10/src to src (cherry picked from commit 000df2f0d6af068bb188e81bbb207f0c2f43bf16) Signed-off-by: Patrick Wendell patr...@databricks.com Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/ab62d73d Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/ab62d73d Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/ab62d73d Branch: refs/heads/branch-1.4 Commit: ab62d73ddb973c25de043e8e9ade7800adf244e8 Parents: bd568df Author: zsxwing zsxw...@gmail.com Authored: Thu May 28 09:04:12 2015 -0700 Committer: Patrick Wendell patr...@databricks.com Committed: Thu May 28 09:04:22 2015 -0700 -- examples/pom.xml| 44 +--- .../streaming/JavaDirectKafkaWordCount.java | 113 --- .../examples/streaming/JavaKafkaWordCount.java | 113 --- .../streaming/DirectKafkaWordCount.scala| 72 .../examples/streaming/KafkaWordCount.scala | 103 - .../streaming/JavaDirectKafkaWordCount.java | 113 +++ .../examples/streaming/JavaKafkaWordCount.java | 113 +++ .../streaming/DirectKafkaWordCount.scala| 72 .../examples/streaming/KafkaWordCount.scala | 103 + 9 files changed, 406 insertions(+), 440 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/ab62d73d/examples/pom.xml -- diff --git a/examples/pom.xml b/examples/pom.xml index 5b04b4f..e4efee7 100644 --- a/examples/pom.xml +++ b/examples/pom.xml @@ -98,6 +98,11 @@ /exclusions /dependency dependency + groupIdorg.apache.spark/groupId + artifactIdspark-streaming-kafka_${scala.binary.version}/artifactId + version${project.version}/version +/dependency +dependency groupIdorg.apache.hbase/groupId artifactIdhbase-testing-util/artifactId version${hbase.version}/version @@ -392,45 +397,6 @@ /dependency /dependencies /profile -profile - !-- We add a source directory specific to Scala 2.10 since Kafka - only works with it -- - idscala-2.10/id - activation -propertyname!scala-2.11/name/property - /activation - dependencies -dependency - groupIdorg.apache.spark/groupId - artifactIdspark-streaming-kafka_${scala.binary.version}/artifactId - version${project.version}/version -/dependency - /dependencies - build -plugins - plugin -groupIdorg.codehaus.mojo/groupId -artifactIdbuild-helper-maven-plugin/artifactId -executions - execution -idadd-scala-sources/id -phasegenerate-sources/phase -goals - goaladd-source/goal -/goals -configuration - sources -sourcesrc/main/scala/source -sourcescala-2.10/src/main/scala/source -sourcescala-2.10/src/main/java/source - /sources -/configuration - /execution -/executions - /plugin -/plugins - /build -/profile !-- Profiles that disable inclusion of certain dependencies. -- profile http://git-wip-us.apache.org/repos/asf/spark/blob/ab62d73d/examples/scala-2.10/src/main/java/org/apache/spark/examples/streaming/JavaDirectKafkaWordCount.java -- diff --git a/examples/scala-2.10/src/main/java/org/apache/spark/examples/streaming/JavaDirectKafkaWordCount.java b/examples/scala-2.10/src/main/java/org/apache/spark/examples/streaming/JavaDirectKafkaWordCount.java deleted file mode 100644 index bab9f24..000 --- a/examples/scala-2.10/src/main/java/org/apache/spark/examples/streaming/JavaDirectKafkaWordCount.java +++ /dev/null @@ -1,113 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the
spark git commit: [SPARK-7895] [STREAMING] [EXAMPLES] Move Kafka examples from scala-2.10/src to src
Repository: spark Updated Branches: refs/heads/master e838a25bd - 000df2f0d [SPARK-7895] [STREAMING] [EXAMPLES] Move Kafka examples from scala-2.10/src to src Since `spark-streaming-kafka` now is published for both Scala 2.10 and 2.11, we can move `KafkaWordCount` and `DirectKafkaWordCount` from `examples/scala-2.10/src/` to `examples/src/` so that they will appear in `spark-examples-***-jar` for Scala 2.11. Author: zsxwing zsxw...@gmail.com Closes #6436 from zsxwing/SPARK-7895 and squashes the following commits: c6052f1 [zsxwing] Update examples/pom.xml 0bcfa87 [zsxwing] Fix the sleep time b9d1256 [zsxwing] Move Kafka examples from scala-2.10/src to src Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/000df2f0 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/000df2f0 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/000df2f0 Branch: refs/heads/master Commit: 000df2f0d6af068bb188e81bbb207f0c2f43bf16 Parents: e838a25 Author: zsxwing zsxw...@gmail.com Authored: Thu May 28 09:04:12 2015 -0700 Committer: Patrick Wendell patr...@databricks.com Committed: Thu May 28 09:04:12 2015 -0700 -- examples/pom.xml| 44 +--- .../streaming/JavaDirectKafkaWordCount.java | 113 --- .../examples/streaming/JavaKafkaWordCount.java | 113 --- .../streaming/DirectKafkaWordCount.scala| 72 .../examples/streaming/KafkaWordCount.scala | 103 - .../streaming/JavaDirectKafkaWordCount.java | 113 +++ .../examples/streaming/JavaKafkaWordCount.java | 113 +++ .../streaming/DirectKafkaWordCount.scala| 72 .../examples/streaming/KafkaWordCount.scala | 103 + 9 files changed, 406 insertions(+), 440 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/000df2f0/examples/pom.xml -- diff --git a/examples/pom.xml b/examples/pom.xml index 5b04b4f..e4efee7 100644 --- a/examples/pom.xml +++ b/examples/pom.xml @@ -98,6 +98,11 @@ /exclusions /dependency dependency + groupIdorg.apache.spark/groupId + artifactIdspark-streaming-kafka_${scala.binary.version}/artifactId + version${project.version}/version +/dependency +dependency groupIdorg.apache.hbase/groupId artifactIdhbase-testing-util/artifactId version${hbase.version}/version @@ -392,45 +397,6 @@ /dependency /dependencies /profile -profile - !-- We add a source directory specific to Scala 2.10 since Kafka - only works with it -- - idscala-2.10/id - activation -propertyname!scala-2.11/name/property - /activation - dependencies -dependency - groupIdorg.apache.spark/groupId - artifactIdspark-streaming-kafka_${scala.binary.version}/artifactId - version${project.version}/version -/dependency - /dependencies - build -plugins - plugin -groupIdorg.codehaus.mojo/groupId -artifactIdbuild-helper-maven-plugin/artifactId -executions - execution -idadd-scala-sources/id -phasegenerate-sources/phase -goals - goaladd-source/goal -/goals -configuration - sources -sourcesrc/main/scala/source -sourcescala-2.10/src/main/scala/source -sourcescala-2.10/src/main/java/source - /sources -/configuration - /execution -/executions - /plugin -/plugins - /build -/profile !-- Profiles that disable inclusion of certain dependencies. -- profile http://git-wip-us.apache.org/repos/asf/spark/blob/000df2f0/examples/scala-2.10/src/main/java/org/apache/spark/examples/streaming/JavaDirectKafkaWordCount.java -- diff --git a/examples/scala-2.10/src/main/java/org/apache/spark/examples/streaming/JavaDirectKafkaWordCount.java b/examples/scala-2.10/src/main/java/org/apache/spark/examples/streaming/JavaDirectKafkaWordCount.java deleted file mode 100644 index bab9f24..000 --- a/examples/scala-2.10/src/main/java/org/apache/spark/examples/streaming/JavaDirectKafkaWordCount.java +++ /dev/null @@ -1,113 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to