spark git commit: Small update in the readme file
Repository: spark Updated Branches: refs/heads/master 0e194645f - 57c72fcce Small update in the readme file Just change the attribute from -PsparkR to -Psparkr Author: Dirceu Semighini Filho dirceu.semigh...@gmail.com Closes #7242 from dirceusemighini/patch-1 and squashes the following commits: fad5991 [Dirceu Semighini Filho] Small update in the readme file Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/57c72fcc Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/57c72fcc Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/57c72fcc Branch: refs/heads/master Commit: 57c72fcce75907c08a1ae53a0d85447176fc3c69 Parents: 0e19464 Author: Dirceu Semighini Filho dirceu.semigh...@gmail.com Authored: Mon Jul 6 13:28:07 2015 -0700 Committer: Reynold Xin r...@databricks.com Committed: Mon Jul 6 13:28:07 2015 -0700 -- R/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/57c72fcc/R/README.md -- diff --git a/R/README.md b/R/README.md index d7d65b4..005f56d 100644 --- a/R/README.md +++ b/R/README.md @@ -6,7 +6,7 @@ SparkR is an R package that provides a light-weight frontend to use Spark from R Build Spark -Build Spark with [Maven](http://spark.apache.org/docs/latest/building-spark.html#building-with-buildmvn) and include the `-PsparkR` profile to build the R package. For example to use the default Hadoop versions you can run +Build Spark with [Maven](http://spark.apache.org/docs/latest/building-spark.html#building-with-buildmvn) and include the `-Psparkr` profile to build the R package. For example to use the default Hadoop versions you can run ``` build/mvn -DskipTests -Psparkr package ``` - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-8784] [SQL] Add Python API for hex and unhex
Repository: spark Updated Branches: refs/heads/master 57c72fcce - 37e4d9214 [SPARK-8784] [SQL] Add Python API for hex and unhex Add Python API for hex/unhex, also cleanup Hex/Unhex Author: Davies Liu dav...@databricks.com Closes #7223 from davies/hex and squashes the following commits: 6f1249d [Davies Liu] no explicit rule to cast string into binary 711a6ed [Davies Liu] fix test f9fe5a3 [Davies Liu] Merge branch 'master' of github.com:apache/spark into hex f032fbb [Davies Liu] Merge branch 'hex' of github.com:davies/spark into hex 49e325f [Davies Liu] Merge branch 'master' of github.com:apache/spark into hex b31fc9a [Davies Liu] Update math.scala 25156b7 [Davies Liu] address comments and fix test c3af78c [Davies Liu] address commments 1a24082 [Davies Liu] Add Python API for hex and unhex Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/37e4d921 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/37e4d921 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/37e4d921 Branch: refs/heads/master Commit: 37e4d92142a6309e2df7d36883e0c7892c3d792d Parents: 57c72fc Author: Davies Liu dav...@databricks.com Authored: Mon Jul 6 13:31:31 2015 -0700 Committer: Reynold Xin r...@databricks.com Committed: Mon Jul 6 13:31:31 2015 -0700 -- python/pyspark/sql/functions.py | 28 +++ .../catalyst/analysis/FunctionRegistry.scala| 2 +- .../spark/sql/catalyst/expressions/math.scala | 83 +++- .../expressions/MathFunctionsSuite.scala| 25 -- .../scala/org/apache/spark/sql/functions.scala | 2 +- 5 files changed, 93 insertions(+), 47 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/37e4d921/python/pyspark/sql/functions.py -- diff --git a/python/pyspark/sql/functions.py b/python/pyspark/sql/functions.py index 49dd033..dca39fa 100644 --- a/python/pyspark/sql/functions.py +++ b/python/pyspark/sql/functions.py @@ -397,6 +397,34 @@ def randn(seed=None): @ignore_unicode_prefix @since(1.5) +def hex(col): +Computes hex value of the given column, which could be StringType, +BinaryType, IntegerType or LongType. + + sqlContext.createDataFrame([('ABC', 3)], ['a', 'b']).select(hex('a'), hex('b')).collect() +[Row(hex(a)=u'414243', hex(b)=u'3')] + +sc = SparkContext._active_spark_context +jc = sc._jvm.functions.hex(_to_java_column(col)) +return Column(jc) + + +@ignore_unicode_prefix +@since(1.5) +def unhex(col): +Inverse of hex. Interprets each pair of characters as a hexadecimal number +and converts to the byte representation of number. + + sqlContext.createDataFrame([('414243',)], ['a']).select(unhex('a')).collect() +[Row(unhex(a)=bytearray(b'ABC'))] + +sc = SparkContext._active_spark_context +jc = sc._jvm.functions.unhex(_to_java_column(col)) +return Column(jc) + + +@ignore_unicode_prefix +@since(1.5) def sha1(col): Returns the hex string result of SHA-1. http://git-wip-us.apache.org/repos/asf/spark/blob/37e4d921/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala -- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala index 92a50e7..fef2763 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala @@ -168,7 +168,7 @@ object FunctionRegistry { expression[Substring](substring), expression[UnBase64](unbase64), expression[Upper](ucase), -expression[UnHex](unhex), +expression[Unhex](unhex), expression[Upper](upper), // datetime functions http://git-wip-us.apache.org/repos/asf/spark/blob/37e4d921/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/math.scala -- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/math.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/math.scala index 45b7e4d..9250045 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/math.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/math.scala @@ -298,6 +298,21 @@ case class Bin(child: Expression) } } +object Hex { + val hexDigits = Array[Char]( +'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F' + ).map(_.toByte) + + // lookup table to translate '0' - 0 ...
spark git commit: [SPARK-4485] [SQL] 1) Add broadcast hash outer join, (2) Fix SparkPlanTest
Repository: spark Updated Branches: refs/heads/master 37e4d9214 - 2471c0bf7 [SPARK-4485] [SQL] 1) Add broadcast hash outer join, (2) Fix SparkPlanTest This pull request (1) extracts common functions used by hash outer joins and put it in interface HashOuterJoin (2) adds ShuffledHashOuterJoin and BroadcastHashOuterJoin (3) adds test cases for shuffled and broadcast hash outer join (3) makes SparkPlanTest to support binary or more complex operators, and fixes bugs in plan composition in SparkPlanTest Author: kai kaiz...@eecs.berkeley.edu Closes #7162 from kai-zeng/outer and squashes the following commits: 3742359 [kai] Fix not-serializable exception for code-generated keys in broadcasted relations 14e4bf8 [kai] Use CanBroadcast in broadcast outer join planning dc5127e [kai] code style fixes b5a4efa [kai] (1) Add broadcast hash outer join, (2) Fix SparkPlanTest Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/2471c0bf Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/2471c0bf Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/2471c0bf Branch: refs/heads/master Commit: 2471c0bf7f463bb144b44a2e51c0f363e71e099d Parents: 37e4d92 Author: kai kaiz...@eecs.berkeley.edu Authored: Mon Jul 6 14:33:30 2015 -0700 Committer: Michael Armbrust mich...@databricks.com Committed: Mon Jul 6 14:33:30 2015 -0700 -- .../spark/sql/execution/SparkStrategies.scala | 12 +- .../joins/BroadcastHashOuterJoin.scala | 121 +++ .../sql/execution/joins/HashOuterJoin.scala | 95 --- .../execution/joins/ShuffledHashOuterJoin.scala | 85 + .../scala/org/apache/spark/sql/JoinSuite.scala | 40 +- .../spark/sql/execution/SparkPlanTest.scala | 99 --- .../sql/execution/joins/OuterJoinSuite.scala| 88 ++ 7 files changed, 441 insertions(+), 99 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/2471c0bf/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala -- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala index 5daf86d..3204498 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala @@ -117,8 +117,18 @@ private[sql] abstract class SparkStrategies extends QueryPlanner[SparkPlan] { leftKeys, rightKeys, buildSide, planLater(left), planLater(right)) condition.map(Filter(_, hashJoin)).getOrElse(hashJoin) :: Nil + case ExtractEquiJoinKeys( + LeftOuter, leftKeys, rightKeys, condition, left, CanBroadcast(right)) = +joins.BroadcastHashOuterJoin( + leftKeys, rightKeys, LeftOuter, condition, planLater(left), planLater(right)) :: Nil + + case ExtractEquiJoinKeys( + RightOuter, leftKeys, rightKeys, condition, CanBroadcast(left), right) = +joins.BroadcastHashOuterJoin( + leftKeys, rightKeys, RightOuter, condition, planLater(left), planLater(right)) :: Nil + case ExtractEquiJoinKeys(joinType, leftKeys, rightKeys, condition, left, right) = -joins.HashOuterJoin( +joins.ShuffledHashOuterJoin( leftKeys, rightKeys, joinType, condition, planLater(left), planLater(right)) :: Nil case _ = Nil http://git-wip-us.apache.org/repos/asf/spark/blob/2471c0bf/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/BroadcastHashOuterJoin.scala -- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/BroadcastHashOuterJoin.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/BroadcastHashOuterJoin.scala new file mode 100644 index 000..5da04c7 --- /dev/null +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/BroadcastHashOuterJoin.scala @@ -0,0 +1,121 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the License); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + *http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + *
spark git commit: [SPARK-8841] [SQL] Fix partition pruning percentage log message
Repository: spark Updated Branches: refs/heads/master 86768b7b3 - 39e4e7e4d [SPARK-8841] [SQL] Fix partition pruning percentage log message When pruning partitions for a query plan, a message is logged indicating what how many partitions were selected based on predicate criteria, and what percent were pruned. The current release erroneously uses `1 - total/selected` to compute this quantity, leading to nonsense messages like pruned -1000% partitions. The fix is simple and obvious. Author: Steve Lindemann steve.lindem...@engineersgatelp.com Closes #7227 from srlindemann/master and squashes the following commits: c788061 [Steve Lindemann] fix percentPruned log message Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/39e4e7e4 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/39e4e7e4 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/39e4e7e4 Branch: refs/heads/master Commit: 39e4e7e4d89077a637c4cad3a986e0e3447d1ae7 Parents: 86768b7 Author: Steve Lindemann steve.lindem...@engineersgatelp.com Authored: Mon Jul 6 10:17:05 2015 -0700 Committer: Cheng Lian l...@databricks.com Committed: Mon Jul 6 10:17:05 2015 -0700 -- .../scala/org/apache/spark/sql/sources/DataSourceStrategy.scala| 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/39e4e7e4/sql/core/src/main/scala/org/apache/spark/sql/sources/DataSourceStrategy.scala -- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/sources/DataSourceStrategy.scala b/sql/core/src/main/scala/org/apache/spark/sql/sources/DataSourceStrategy.scala index ce16e05..66f7ba9 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/sources/DataSourceStrategy.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/sources/DataSourceStrategy.scala @@ -65,7 +65,7 @@ private[sql] object DataSourceStrategy extends Strategy with Logging { logInfo { val total = t.partitionSpec.partitions.length val selected = selectedPartitions.length -val percentPruned = (1 - total.toDouble / selected.toDouble) * 100 +val percentPruned = (1 - selected.toDouble / total.toDouble) * 100 sSelected $selected partitions out of $total, pruned $percentPruned% partitions. } - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-8765] [MLLIB] Fix PySpark PowerIterationClustering test issue
Repository: spark Updated Branches: refs/heads/master 96c5eeec3 - 0effe180f [SPARK-8765] [MLLIB] Fix PySpark PowerIterationClustering test issue PySpark PowerIterationClustering test failure due to bad demo data. If the data is small, PowerIterationClustering will behavior indeterministic. Author: Yanbo Liang yblia...@gmail.com Closes #7177 from yanboliang/spark-8765 and squashes the following commits: 392ae54 [Yanbo Liang] fix model.assignments output 5ec3f1e [Yanbo Liang] fix PySpark PowerIterationClustering test issue Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/0effe180 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/0effe180 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/0effe180 Branch: refs/heads/master Commit: 0effe180f4c2cf37af1012b33b43912bdecaf756 Parents: 96c5eee Author: Yanbo Liang yblia...@gmail.com Authored: Mon Jul 6 16:15:12 2015 -0700 Committer: Xiangrui Meng m...@databricks.com Committed: Mon Jul 6 16:15:12 2015 -0700 -- python/pyspark/mllib/clustering.py | 16 ++-- 1 file changed, 14 insertions(+), 2 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/0effe180/python/pyspark/mllib/clustering.py -- diff --git a/python/pyspark/mllib/clustering.py b/python/pyspark/mllib/clustering.py index a3eab63..ed4d78a 100644 --- a/python/pyspark/mllib/clustering.py +++ b/python/pyspark/mllib/clustering.py @@ -282,18 +282,30 @@ class PowerIterationClusteringModel(JavaModelWrapper, JavaSaveable, JavaLoader): Model produced by [[PowerIterationClustering]]. - data = [(0, 1, 1.0), (0, 2, 1.0), (1, 3, 1.0), (2, 3, 1.0), -... (0, 3, 1.0), (1, 2, 1.0), (0, 4, 0.1)] + data = [(0, 1, 1.0), (0, 2, 1.0), (0, 3, 1.0), (1, 2, 1.0), (1, 3, 1.0), +... (2, 3, 1.0), (3, 4, 0.1), (4, 5, 1.0), (4, 15, 1.0), (5, 6, 1.0), +... (6, 7, 1.0), (7, 8, 1.0), (8, 9, 1.0), (9, 10, 1.0), (10, 11, 1.0), +... (11, 12, 1.0), (12, 13, 1.0), (13, 14, 1.0), (14, 15, 1.0)] rdd = sc.parallelize(data, 2) model = PowerIterationClustering.train(rdd, 2, 100) model.k 2 + result = sorted(model.assignments().collect(), key=lambda x: x.id) + result[0].cluster == result[1].cluster == result[2].cluster == result[3].cluster +True + result[4].cluster == result[5].cluster == result[6].cluster == result[7].cluster +True import os, tempfile path = tempfile.mkdtemp() model.save(sc, path) sameModel = PowerIterationClusteringModel.load(sc, path) sameModel.k 2 + result = sorted(model.assignments().collect(), key=lambda x: x.id) + result[0].cluster == result[1].cluster == result[2].cluster == result[3].cluster +True + result[4].cluster == result[5].cluster == result[6].cluster == result[7].cluster +True from shutil import rmtree try: ... rmtree(path) - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-8463][SQL] Use DriverRegistry to load jdbc driver at writing path
Repository: spark Updated Branches: refs/heads/branch-1.4 e990561ce - 4d813833d [SPARK-8463][SQL] Use DriverRegistry to load jdbc driver at writing path JIRA: https://issues.apache.org/jira/browse/SPARK-8463 Currently, at the reading path, `DriverRegistry` is used to load needed jdbc driver at executors. However, at the writing path, we also need `DriverRegistry` to load jdbc driver. Author: Liang-Chi Hsieh vii...@gmail.com Closes #6900 from viirya/jdbc_write_driver and squashes the following commits: 16cd04b [Liang-Chi Hsieh] Use DriverRegistry to load jdbc driver at writing path. (cherry picked from commit d4d6d31db5cc5c69ac369f754b7489f444c9ba2f) Signed-off-by: Reynold Xin r...@databricks.com Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/4d813833 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/4d813833 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/4d813833 Branch: refs/heads/branch-1.4 Commit: 4d813833df57a8a75df58aadabe061acd114431d Parents: e990561 Author: Liang-Chi Hsieh vii...@gmail.com Authored: Mon Jul 6 17:16:44 2015 -0700 Committer: Reynold Xin r...@databricks.com Committed: Mon Jul 6 17:17:15 2015 -0700 -- .../src/main/scala/org/apache/spark/sql/jdbc/jdbc.scala | 11 ++- 1 file changed, 6 insertions(+), 5 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/4d813833/sql/core/src/main/scala/org/apache/spark/sql/jdbc/jdbc.scala -- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/jdbc.scala b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/jdbc.scala index dd8aaf6..f7ea852 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/jdbc.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/jdbc.scala @@ -58,13 +58,12 @@ package object jdbc { * are used. */ def savePartition( -url: String, +getConnection: () = Connection, table: String, iterator: Iterator[Row], rddSchema: StructType, -nullTypes: Array[Int], -properties: Properties): Iterator[Byte] = { - val conn = DriverManager.getConnection(url, properties) +nullTypes: Array[Int]): Iterator[Byte] = { + val conn = getConnection() var committed = false try { conn.setAutoCommit(false) // Everything in the same db transaction. @@ -185,8 +184,10 @@ package object jdbc { } val rddSchema = df.schema + val driver: String = DriverRegistry.getDriverClassName(url) + val getConnection: () = Connection = JDBCRDD.getConnector(driver, url, properties) df.foreachPartition { iterator = -JDBCWriteDetails.savePartition(url, table, iterator, rddSchema, nullTypes, properties) +JDBCWriteDetails.savePartition(getConnection, table, iterator, rddSchema, nullTypes) } } - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-8463][SQL] Use DriverRegistry to load jdbc driver at writing path
Repository: spark Updated Branches: refs/heads/master 09a06418d - d4d6d31db [SPARK-8463][SQL] Use DriverRegistry to load jdbc driver at writing path JIRA: https://issues.apache.org/jira/browse/SPARK-8463 Currently, at the reading path, `DriverRegistry` is used to load needed jdbc driver at executors. However, at the writing path, we also need `DriverRegistry` to load jdbc driver. Author: Liang-Chi Hsieh vii...@gmail.com Closes #6900 from viirya/jdbc_write_driver and squashes the following commits: 16cd04b [Liang-Chi Hsieh] Use DriverRegistry to load jdbc driver at writing path. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/d4d6d31d Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/d4d6d31d Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/d4d6d31d Branch: refs/heads/master Commit: d4d6d31db5cc5c69ac369f754b7489f444c9ba2f Parents: 09a0641 Author: Liang-Chi Hsieh vii...@gmail.com Authored: Mon Jul 6 17:16:44 2015 -0700 Committer: Reynold Xin r...@databricks.com Committed: Mon Jul 6 17:16:44 2015 -0700 -- .../src/main/scala/org/apache/spark/sql/jdbc/jdbc.scala | 11 ++- 1 file changed, 6 insertions(+), 5 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/d4d6d31d/sql/core/src/main/scala/org/apache/spark/sql/jdbc/jdbc.scala -- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/jdbc.scala b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/jdbc.scala index dd8aaf6..f7ea852 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/jdbc.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/jdbc.scala @@ -58,13 +58,12 @@ package object jdbc { * are used. */ def savePartition( -url: String, +getConnection: () = Connection, table: String, iterator: Iterator[Row], rddSchema: StructType, -nullTypes: Array[Int], -properties: Properties): Iterator[Byte] = { - val conn = DriverManager.getConnection(url, properties) +nullTypes: Array[Int]): Iterator[Byte] = { + val conn = getConnection() var committed = false try { conn.setAutoCommit(false) // Everything in the same db transaction. @@ -185,8 +184,10 @@ package object jdbc { } val rddSchema = df.schema + val driver: String = DriverRegistry.getDriverClassName(url) + val getConnection: () = Connection = JDBCRDD.getConnector(driver, url, properties) df.foreachPartition { iterator = -JDBCWriteDetails.savePartition(url, table, iterator, rddSchema, nullTypes, properties) +JDBCWriteDetails.savePartition(getConnection, table, iterator, rddSchema, nullTypes) } } - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-6707] [CORE] [MESOS] Mesos Scheduler should allow the user to specify constraints based on slave attributes
Repository: spark Updated Branches: refs/heads/master 9ff203346 - 1165b17d2 [SPARK-6707] [CORE] [MESOS] Mesos Scheduler should allow the user to specify constraints based on slave attributes Currently, the mesos scheduler only looks at the 'cpu' and 'mem' resources when trying to determine the usablility of a resource offer from a mesos slave node. It may be preferable for the user to be able to ensure that the spark jobs are only started on a certain set of nodes (based on attributes). For example, If the user sets a property, let's say `spark.mesos.constraints` is set to `tachyon=true;us-east-1=false`, then the resource offers will be checked to see if they meet both these constraints and only then will be accepted to start new executors. Author: Ankur Chauhan achau...@brightcove.com Closes #5563 from ankurcha/mesos_attribs and squashes the following commits: 902535b [Ankur Chauhan] Fix line length d83801c [Ankur Chauhan] Update code as per code review comments 8b73f2d [Ankur Chauhan] Fix imports c3523e7 [Ankur Chauhan] Added docs 1a24d0b [Ankur Chauhan] Expand scope of attributes matching to include all data types 482fd71 [Ankur Chauhan] Update access modifier to private[this] for offer constraints 5ccc32d [Ankur Chauhan] Fix nit pick whitespace 1bce782 [Ankur Chauhan] Fix nit pick whitespace c0cbc75 [Ankur Chauhan] Use offer id value for debug message 7fee0ea [Ankur Chauhan] Add debug statements fc7eb5b [Ankur Chauhan] Fix import codestyle 00be252 [Ankur Chauhan] Style changes as per code review comments 662535f [Ankur Chauhan] Incorporate code review comments + use SparkFunSuite fdc0937 [Ankur Chauhan] Decline offers that did not meet criteria 67b58a0 [Ankur Chauhan] Add documentation for spark.mesos.constraints 63f53f4 [Ankur Chauhan] Update codestyle - uniform style for config values 02031e4 [Ankur Chauhan] Fix scalastyle warnings in tests c09ed84 [Ankur Chauhan] Fixed the access modifier on offerConstraints val to private[mesos] 0c64df6 [Ankur Chauhan] Rename overhead fractions to memory_*, fix spacing 8cc1e8f [Ankur Chauhan] Make exception message more explicit about the source of the error addedba [Ankur Chauhan] Added test case for malformed constraint string ec9d9a6 [Ankur Chauhan] Add tests for parse constraint string 72fe88a [Ankur Chauhan] Fix up tests + remove redundant method override, combine utility class into new mesos scheduler util trait 92b47fd [Ankur Chauhan] Add attributes based constraints support to MesosScheduler Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/1165b17d Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/1165b17d Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/1165b17d Branch: refs/heads/master Commit: 1165b17d24cdf1dbebb2faca14308dfe5c2a652c Parents: 9ff2033 Author: Ankur Chauhan achau...@brightcove.com Authored: Mon Jul 6 16:04:57 2015 -0700 Committer: Andrew Or and...@databricks.com Committed: Mon Jul 6 16:04:57 2015 -0700 -- .../mesos/CoarseMesosSchedulerBackend.scala | 43 +++--- .../scheduler/cluster/mesos/MemoryUtils.scala | 31 .../cluster/mesos/MesosClusterScheduler.scala | 1 + .../cluster/mesos/MesosSchedulerBackend.scala | 62 +--- .../cluster/mesos/MesosSchedulerUtils.scala | 153 ++- .../cluster/mesos/MemoryUtilsSuite.scala| 46 -- .../mesos/MesosSchedulerBackendSuite.scala | 6 +- .../mesos/MesosSchedulerUtilsSuite.scala| 140 + docs/running-on-mesos.md| 22 +++ 9 files changed, 376 insertions(+), 128 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/1165b17d/core/src/main/scala/org/apache/spark/scheduler/cluster/mesos/CoarseMesosSchedulerBackend.scala -- diff --git a/core/src/main/scala/org/apache/spark/scheduler/cluster/mesos/CoarseMesosSchedulerBackend.scala b/core/src/main/scala/org/apache/spark/scheduler/cluster/mesos/CoarseMesosSchedulerBackend.scala index 6b8edca..b68f8c7 100644 --- a/core/src/main/scala/org/apache/spark/scheduler/cluster/mesos/CoarseMesosSchedulerBackend.scala +++ b/core/src/main/scala/org/apache/spark/scheduler/cluster/mesos/CoarseMesosSchedulerBackend.scala @@ -18,18 +18,18 @@ package org.apache.spark.scheduler.cluster.mesos import java.io.File -import java.util.{Collections, List = JList} +import java.util.{List = JList} import scala.collection.JavaConversions._ import scala.collection.mutable.{HashMap, HashSet} -import org.apache.mesos.Protos.{TaskInfo = MesosTaskInfo, _} import org.apache.mesos.{Scheduler = MScheduler, _} +import org.apache.mesos.Protos.{TaskInfo = MesosTaskInfo, _} +import org.apache.spark.{SparkContext, SparkEnv, SparkException, TaskState} import
spark git commit: [SPARK-8656] [WEBUI] Fix the webUI and JSON API number is not synced
Repository: spark Updated Branches: refs/heads/master 132e7fca1 - 9ff203346 [SPARK-8656] [WEBUI] Fix the webUI and JSON API number is not synced Spark standalone master web UI show Alive Workers total core, total used cores and Alive workers total memory, memory used. But the JSON API page http://MASTERURL:8088/json; shows ALL workers core, memory number. This webUI data is not sync with the JSON API. The proper way is to sync the number with webUI and JSON API. Author: Wisely Chen wiselyc...@appier.com Closes #7038 from thegiive/SPARK-8656 and squashes the following commits: 9e54bf0 [Wisely Chen] Change variable name to camel case 2c8ea89 [Wisely Chen] Change some styling and add local variable 431d2b0 [Wisely Chen] Worker List should contain DEAD node also 8b3b8e8 [Wisely Chen] [SPARK-8656] Fix the webUI and JSON API number is not synced Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/9ff20334 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/9ff20334 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/9ff20334 Branch: refs/heads/master Commit: 9ff203346ca4decf2999e33bfb8c400ec75313e6 Parents: 132e7fc Author: Wisely Chen wiselyc...@appier.com Authored: Mon Jul 6 16:04:01 2015 -0700 Committer: Andrew Or and...@databricks.com Committed: Mon Jul 6 16:04:01 2015 -0700 -- .../main/scala/org/apache/spark/deploy/JsonProtocol.scala | 9 + .../scala/org/apache/spark/deploy/master/WorkerInfo.scala | 2 ++ 2 files changed, 7 insertions(+), 4 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/9ff20334/core/src/main/scala/org/apache/spark/deploy/JsonProtocol.scala -- diff --git a/core/src/main/scala/org/apache/spark/deploy/JsonProtocol.scala b/core/src/main/scala/org/apache/spark/deploy/JsonProtocol.scala index 2954f93..ccffb36 100644 --- a/core/src/main/scala/org/apache/spark/deploy/JsonProtocol.scala +++ b/core/src/main/scala/org/apache/spark/deploy/JsonProtocol.scala @@ -76,12 +76,13 @@ private[deploy] object JsonProtocol { } def writeMasterState(obj: MasterStateResponse): JObject = { +val aliveWorkers = obj.workers.filter(_.isAlive()) (url - obj.uri) ~ (workers - obj.workers.toList.map(writeWorkerInfo)) ~ -(cores - obj.workers.map(_.cores).sum) ~ -(coresused - obj.workers.map(_.coresUsed).sum) ~ -(memory - obj.workers.map(_.memory).sum) ~ -(memoryused - obj.workers.map(_.memoryUsed).sum) ~ +(cores - aliveWorkers.map(_.cores).sum) ~ +(coresused - aliveWorkers.map(_.coresUsed).sum) ~ +(memory - aliveWorkers.map(_.memory).sum) ~ +(memoryused - aliveWorkers.map(_.memoryUsed).sum) ~ (activeapps - obj.activeApps.toList.map(writeApplicationInfo)) ~ (completedapps - obj.completedApps.toList.map(writeApplicationInfo)) ~ (activedrivers - obj.activeDrivers.toList.map(writeDriverInfo)) ~ http://git-wip-us.apache.org/repos/asf/spark/blob/9ff20334/core/src/main/scala/org/apache/spark/deploy/master/WorkerInfo.scala -- diff --git a/core/src/main/scala/org/apache/spark/deploy/master/WorkerInfo.scala b/core/src/main/scala/org/apache/spark/deploy/master/WorkerInfo.scala index 4718110..f751966 100644 --- a/core/src/main/scala/org/apache/spark/deploy/master/WorkerInfo.scala +++ b/core/src/main/scala/org/apache/spark/deploy/master/WorkerInfo.scala @@ -105,4 +105,6 @@ private[spark] class WorkerInfo( def setState(state: WorkerState.Value): Unit = { this.state = state } + + def isAlive(): Boolean = this.state == WorkerState.ALIVE } - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-8588] [SQL] Regression test
Repository: spark Updated Branches: refs/heads/master 0effe180f - 7b467cc93 [SPARK-8588] [SQL] Regression test This PR adds regression test for https://issues.apache.org/jira/browse/SPARK-8588 (fixed by https://github.com/apache/spark/commit/457d07eaa023b44b75344110508f629925eb6247). Author: Yin Huai yh...@databricks.com This patch had conflicts when merged, resolved by Committer: Michael Armbrust mich...@databricks.com Closes #7103 from yhuai/SPARK-8588-test and squashes the following commits: eb5f418 [Yin Huai] Add a query test. c61a173 [Yin Huai] Regression test for SPARK-8588. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/7b467cc9 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/7b467cc9 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/7b467cc9 Branch: refs/heads/master Commit: 7b467cc9348fa910e445ad08914a72f8ed4fc249 Parents: 0effe18 Author: Yin Huai yh...@databricks.com Authored: Mon Jul 6 16:26:31 2015 -0700 Committer: Michael Armbrust mich...@databricks.com Committed: Mon Jul 6 16:28:47 2015 -0700 -- .../analysis/HiveTypeCoercionSuite.scala| 21 .../sql/hive/execution/SQLQuerySuite.scala | 16 +++ 2 files changed, 37 insertions(+) -- http://git-wip-us.apache.org/repos/asf/spark/blob/7b467cc9/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/HiveTypeCoercionSuite.scala -- diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/HiveTypeCoercionSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/HiveTypeCoercionSuite.scala index b564266..93db33d 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/HiveTypeCoercionSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/HiveTypeCoercionSuite.scala @@ -271,4 +271,25 @@ class HiveTypeCoercionSuite extends PlanTest { Literal(true) ) } + + /** + * There are rules that need to not fire before child expressions get resolved. + * We use this test to make sure those rules do not fire early. + */ + test(make sure rules do not fire early) { +// InConversion +val inConversion = HiveTypeCoercion.InConversion +ruleTest(inConversion, + In(UnresolvedAttribute(a), Seq(Literal(1))), + In(UnresolvedAttribute(a), Seq(Literal(1))) +) +ruleTest(inConversion, + In(Literal(test), Seq(UnresolvedAttribute(a), Literal(1))), + In(Literal(test), Seq(UnresolvedAttribute(a), Literal(1))) +) +ruleTest(inConversion, + In(Literal(a), Seq(Literal(1), Literal(b))), + In(Literal(a), Seq(Cast(Literal(1), StringType), Cast(Literal(b), StringType))) +) + } } http://git-wip-us.apache.org/repos/asf/spark/blob/7b467cc9/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala -- diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala index 6d64539..bf9f2ec 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala @@ -990,5 +990,21 @@ class SQLQuerySuite extends QueryTest { Timestamp.valueOf(1969-12-31 16:00:00), String.valueOf(1969-12-31 16:00:00), Timestamp.valueOf(1970-01-01 00:00:00))) + + } + + test(SPARK-8588 HiveTypeCoercion.inConversion fires too early) { +val df = + TestHive.createDataFrame(Seq((1, 2014-01-01), (2, 2015-01-01), (3, 2016-01-01))) +df.toDF(id, date).registerTempTable(test_SPARK8588) +checkAnswer( + TestHive.sql( + + |select id, concat(year(date)) + |from test_SPARK8588 where concat(year(date), ' year') in ('2015 year', '2014 year') +.stripMargin), + Row(1, 2014) :: Row(2, 2015) :: Nil +) +TestHive.dropTempTable(test_SPARK8588) } } - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-8072] [SQL] Better AnalysisException for writing DataFrame with identically named columns
Repository: spark Updated Branches: refs/heads/master 7b467cc93 - 09a06418d [SPARK-8072] [SQL] Better AnalysisException for writing DataFrame with identically named columns Adding a function checkConstraints which will check for the constraints to be applied on the dataframe / dataframe schema. Function called before storing the dataframe to an external storage. Function added in the corresponding datasource API. cc rxin marmbrus Author: animesh animesh@apache.spark This patch had conflicts when merged, resolved by Committer: Michael Armbrust mich...@databricks.com Closes #7013 from animeshbaranawal/8072 and squashes the following commits: f70dd0e [animesh] Change IO exception to Analysis Exception fd45e1b [animesh] 8072: Fix Style Issues a8a964f [animesh] 8072: Improving on previous commits 3cc4d2c [animesh] Fix Style Issues 1a89115 [animesh] Fix Style Issues 98b4399 [animesh] 8072 : Moved the exception handling to ResolvedDataSource specific to parquet format 7c3d928 [animesh] 8072: Adding check to DataFrameWriter.scala Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/09a06418 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/09a06418 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/09a06418 Branch: refs/heads/master Commit: 09a06418debc25da0191d98798f7c5016d39be91 Parents: 7b467cc Author: animesh animesh@apache.spark Authored: Mon Jul 6 16:39:49 2015 -0700 Committer: Michael Armbrust mich...@databricks.com Committed: Mon Jul 6 16:39:49 2015 -0700 -- .../apache/spark/sql/json/JSONRelation.scala| 31 .../apache/spark/sql/parquet/newParquet.scala | 19 +++- .../org/apache/spark/sql/DataFrameSuite.scala | 24 +++ 3 files changed, 73 insertions(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/09a06418/sql/core/src/main/scala/org/apache/spark/sql/json/JSONRelation.scala -- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/json/JSONRelation.scala b/sql/core/src/main/scala/org/apache/spark/sql/json/JSONRelation.scala index 69bf13e..2361d3b 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/json/JSONRelation.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/json/JSONRelation.scala @@ -22,6 +22,7 @@ import java.io.IOException import org.apache.hadoop.fs.Path import org.apache.spark.rdd.RDD +import org.apache.spark.sql.AnalysisException import org.apache.spark.sql.catalyst.expressions.{Attribute, Expression} import org.apache.spark.sql.sources._ import org.apache.spark.sql.types.StructType @@ -37,6 +38,17 @@ private[sql] class DefaultSource parameters.getOrElse(path, sys.error('path' must be specified for json data.)) } + /** Constraints to be imposed on dataframe to be stored. */ + private def checkConstraints(data: DataFrame): Unit = { +if (data.schema.fieldNames.length != data.schema.fieldNames.distinct.length) { + val duplicateColumns = data.schema.fieldNames.groupBy(identity).collect { +case (x, ys) if ys.length 1 = \ + x + \ + }.mkString(, ) + throw new AnalysisException(sDuplicate column(s) : $duplicateColumns found, + +scannot save to JSON format) +} + } + /** Returns a new base relation with the parameters. */ override def createRelation( sqlContext: SQLContext, @@ -63,6 +75,10 @@ private[sql] class DefaultSource mode: SaveMode, parameters: Map[String, String], data: DataFrame): BaseRelation = { +// check if dataframe satisfies the constraints +// before moving forward +checkConstraints(data) + val path = checkPath(parameters) val filesystemPath = new Path(path) val fs = filesystemPath.getFileSystem(sqlContext.sparkContext.hadoopConfiguration) @@ -130,6 +146,17 @@ private[sql] class JSONRelation( samplingRatio, userSpecifiedSchema)(sqlContext) + /** Constraints to be imposed on dataframe to be stored. */ + private def checkConstraints(data: DataFrame): Unit = { +if (data.schema.fieldNames.length != data.schema.fieldNames.distinct.length) { + val duplicateColumns = data.schema.fieldNames.groupBy(identity).collect { +case (x, ys) if ys.length 1 = \ + x + \ + }.mkString(, ) + throw new AnalysisException(sDuplicate column(s) : $duplicateColumns found, + +scannot save to JSON format) +} + } + private val useJacksonStreamingAPI: Boolean = sqlContext.conf.useJacksonStreamingAPI override val needConversion: Boolean = false @@ -178,6 +205,10 @@ private[sql] class JSONRelation( } override def insert(data: DataFrame, overwrite: Boolean): Unit = { +// check if dataframe satisfies constraints +// before moving
spark git commit: [MINOR] [SQL] remove unused code in Exchange
Repository: spark Updated Branches: refs/heads/master 2471c0bf7 - 132e7fca1 [MINOR] [SQL] remove unused code in Exchange Author: Daoyuan Wang daoyuan.w...@intel.com Closes #7234 from adrian-wang/exchangeclean and squashes the following commits: b093ec9 [Daoyuan Wang] remove unused code Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/132e7fca Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/132e7fca Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/132e7fca Branch: refs/heads/master Commit: 132e7fca129be8f00ba429a51bcef60abb2eed6d Parents: 2471c0b Author: Daoyuan Wang daoyuan.w...@intel.com Authored: Mon Jul 6 15:54:43 2015 -0700 Committer: Josh Rosen joshro...@databricks.com Committed: Mon Jul 6 15:54:43 2015 -0700 -- .../org/apache/spark/sql/execution/Exchange.scala | 14 -- 1 file changed, 14 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/132e7fca/sql/core/src/main/scala/org/apache/spark/sql/execution/Exchange.scala -- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/Exchange.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/Exchange.scala index edc64a0..e054c1d 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/Exchange.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/Exchange.scala @@ -117,20 +117,6 @@ case class Exchange( } } - private val keyOrdering = { -if (newOrdering.nonEmpty) { - val key = newPartitioning.keyExpressions - val boundOrdering = newOrdering.map { o = -val ordinal = key.indexOf(o.child) -if (ordinal == -1) sys.error(sInvalid ordering on $o requested for $newPartitioning) -o.copy(child = BoundReference(ordinal, o.child.dataType, o.child.nullable)) - } - new RowOrdering(boundOrdering) -} else { - null // Ordering will not be used -} - } - @transient private lazy val sparkConf = child.sqlContext.sparkContext.getConf private def getSerializer( - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: Revert [SPARK-7212] [MLLIB] Add sequence learning flag
Repository: spark Updated Branches: refs/heads/master 1165b17d2 - 96c5eeec3 Revert [SPARK-7212] [MLLIB] Add sequence learning flag This reverts commit 25f574eb9a3cb9b93b7d9194a8ec16e00ce2c036. After speaking to some users and developers, we realized that FP-growth doesn't meet the requirement for frequent sequence mining. PrefixSpan (SPARK-6487) would be the correct algorithm for it. feynmanliang Author: Xiangrui Meng m...@databricks.com Closes #7240 from mengxr/SPARK-7212.revert and squashes the following commits: 2b3d66b [Xiangrui Meng] Revert [SPARK-7212] [MLLIB] Add sequence learning flag Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/96c5eeec Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/96c5eeec Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/96c5eeec Branch: refs/heads/master Commit: 96c5eeec3970e8b1ebc6ddf5c97a7acc47f539dc Parents: 1165b17 Author: Xiangrui Meng m...@databricks.com Authored: Mon Jul 6 16:11:22 2015 -0700 Committer: Xiangrui Meng m...@databricks.com Committed: Mon Jul 6 16:11:22 2015 -0700 -- .../org/apache/spark/mllib/fpm/FPGrowth.scala | 38 +++--- .../apache/spark/mllib/fpm/FPGrowthSuite.scala | 52 +--- python/pyspark/mllib/fpm.py | 4 +- 3 files changed, 12 insertions(+), 82 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/96c5eeec/mllib/src/main/scala/org/apache/spark/mllib/fpm/FPGrowth.scala -- diff --git a/mllib/src/main/scala/org/apache/spark/mllib/fpm/FPGrowth.scala b/mllib/src/main/scala/org/apache/spark/mllib/fpm/FPGrowth.scala index abac080..efa8459 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/fpm/FPGrowth.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/fpm/FPGrowth.scala @@ -36,7 +36,7 @@ import org.apache.spark.storage.StorageLevel * :: Experimental :: * * Model trained by [[FPGrowth]], which holds frequent itemsets. - * @param freqItemsets frequent itemsets, which is an RDD of [[FreqItemset]] + * @param freqItemsets frequent itemset, which is an RDD of [[FreqItemset]] * @tparam Item item type */ @Experimental @@ -62,14 +62,13 @@ class FPGrowthModel[Item: ClassTag](val freqItemsets: RDD[FreqItemset[Item]]) ex @Experimental class FPGrowth private ( private var minSupport: Double, -private var numPartitions: Int, -private var ordered: Boolean) extends Logging with Serializable { +private var numPartitions: Int) extends Logging with Serializable { /** * Constructs a default instance with default parameters {minSupport: `0.3`, numPartitions: same - * as the input data, ordered: `false`}. + * as the input data}. */ - def this() = this(0.3, -1, false) + def this() = this(0.3, -1) /** * Sets the minimal support level (default: `0.3`). @@ -88,15 +87,6 @@ class FPGrowth private ( } /** - * Indicates whether to mine itemsets (unordered) or sequences (ordered) (default: false, mine - * itemsets). - */ - def setOrdered(ordered: Boolean): this.type = { -this.ordered = ordered -this - } - - /** * Computes an FP-Growth model that contains frequent itemsets. * @param data input data set, each element contains a transaction * @return an [[FPGrowthModel]] @@ -165,7 +155,7 @@ class FPGrowth private ( .flatMap { case (part, tree) = tree.extract(minCount, x = partitioner.getPartition(x) == part) }.map { case (ranks, count) = - new FreqItemset(ranks.map(i = freqItems(i)).reverse.toArray, count, ordered) + new FreqItemset(ranks.map(i = freqItems(i)).toArray, count) } } @@ -181,12 +171,9 @@ class FPGrowth private ( itemToRank: Map[Item, Int], partitioner: Partitioner): mutable.Map[Int, Array[Int]] = { val output = mutable.Map.empty[Int, Array[Int]] -// Filter the basket by frequent items pattern +// Filter the basket by frequent items pattern and sort their ranks. val filtered = transaction.flatMap(itemToRank.get) -if (!this.ordered) { - ju.Arrays.sort(filtered) -} -// Generate conditional transactions +ju.Arrays.sort(filtered) val n = filtered.length var i = n - 1 while (i = 0) { @@ -211,18 +198,9 @@ object FPGrowth { * Frequent itemset. * @param items items in this itemset. Java users should call [[FreqItemset#javaItems]] instead. * @param freq frequency - * @param ordered indicates if items represents an itemset (false) or sequence (true) * @tparam Item item type */ - class FreqItemset[Item](val items: Array[Item], val freq: Long, val ordered: Boolean) -extends Serializable { - -/** - * Auxillary constructor, assumes unordered by default. - */ -def
spark git commit: [SPARK-8819] Fix build for maven 3.3.x
Repository: spark Updated Branches: refs/heads/branch-1.4 4d813833d - 947b84598 [SPARK-8819] Fix build for maven 3.3.x This is a workaround for MSHADE-148, which leads to an infinite loop when building Spark with maven 3.3.x. This was originally caused by #6441, which added a bunch of test dependencies on the spark-core test module. Recently, it was revealed by #7193. This patch adds a `-Prelease` profile. If present, it will set `createDependencyReducedPom` to true. The consequences are: - If you are releasing Spark with this profile, you are fine as long as you use maven 3.2.x or before. - If you are releasing Spark without this profile, you will run into SPARK-8781. - If you are not releasing Spark but you are using this profile, you may run into SPARK-8819. - If you are not releasing Spark and you did not include this profile, you are fine. This is all documented in `pom.xml` and tested locally with both versions of maven. Author: Andrew Or and...@databricks.com Closes #7219 from andrewor14/fix-maven-build and squashes the following commits: 1d37e87 [Andrew Or] Merge branch 'master' of github.com:apache/spark into fix-maven-build 3574ae4 [Andrew Or] Review comments f39199c [Andrew Or] Create a -Prelease profile that flags `createDependencyReducedPom` (cherry picked from commit 9eae5fa642317dd11fc783d832d4cbb7e62db471) Signed-off-by: Andrew Or and...@databricks.com Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/947b8459 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/947b8459 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/947b8459 Branch: refs/heads/branch-1.4 Commit: 947b845988a48dfd1fbdda74dd7fe5a537e12f8c Parents: 4d81383 Author: Andrew Or and...@databricks.com Authored: Mon Jul 6 19:22:30 2015 -0700 Committer: Andrew Or and...@databricks.com Committed: Mon Jul 6 19:22:38 2015 -0700 -- dev/create-release/create-release.sh | 4 ++-- pom.xml | 24 2 files changed, 26 insertions(+), 2 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/947b8459/dev/create-release/create-release.sh -- diff --git a/dev/create-release/create-release.sh b/dev/create-release/create-release.sh index 54274a8..cfe2cd4 100755 --- a/dev/create-release/create-release.sh +++ b/dev/create-release/create-release.sh @@ -118,13 +118,13 @@ if [[ ! $@ =~ --skip-publish ]]; then rm -rf $SPARK_REPO - build/mvn -DskipTests -Pyarn -Phive \ + build/mvn -DskipTests -Pyarn -Phive -Prelease-profile\ -Phive-thriftserver -Phadoop-2.2 -Pspark-ganglia-lgpl -Pkinesis-asl \ clean install ./dev/change-version-to-2.11.sh - build/mvn -DskipTests -Pyarn -Phive \ + build/mvn -DskipTests -Pyarn -Phive -Prelease-profile\ -Dscala-2.11 -Phadoop-2.2 -Pspark-ganglia-lgpl -Pkinesis-asl \ clean install http://git-wip-us.apache.org/repos/asf/spark/blob/947b8459/pom.xml -- diff --git a/pom.xml b/pom.xml index 75b27f9..e1b3e87 100644 --- a/pom.xml +++ b/pom.xml @@ -162,6 +162,8 @@ fasterxml.jackson.version2.4.4/fasterxml.jackson.version snappy.version1.1.1.7/snappy.version netlib.java.version1.1.2/netlib.java.version +!-- For maven shade plugin (see SPARK-8819) -- +create.dependency.reduced.pomfalse/create.dependency.reduced.pom test.java.home${java.home}/test.java.home @@ -1428,6 +1430,8 @@ version2.3/version configuration shadedArtifactAttachedfalse/shadedArtifactAttached + !-- Work around MSHADE-148. See SPARK-8819. -- + createDependencyReducedPom${create.dependency.reduced.pom}/createDependencyReducedPom artifactSet includes !-- At a minimum we must include this to force effective pom generation -- @@ -1812,6 +1816,26 @@ /properties /profile +profile + !-- + Use this profile only for making Spark releases. Note that due to SPARK-8819, + you must use maven version 3.2.x or before to avoid running into MSHADE-148. + -- + idrelease-profile/id + properties +!-- +The maven shade plugin has a bug where enabling the `createDependencyReducedPom` +property causes maven to go into an infinite loop (MSHADE-148). This is only an +issue for the Spark build if the maven version is 3.3.x or newer (SPARK-8819). + +However, since disabling this property has the side effect of not resolving +variables in the released pom files (SPARK-8781), we need to enable this during +releases. +-- +
spark git commit: [SPARK-8819] Fix build for maven 3.3.x
Repository: spark Updated Branches: refs/heads/branch-1.3 502e1fd68 - 5f1d1c0b8 [SPARK-8819] Fix build for maven 3.3.x This is a workaround for MSHADE-148, which leads to an infinite loop when building Spark with maven 3.3.x. This was originally caused by #6441, which added a bunch of test dependencies on the spark-core test module. Recently, it was revealed by #7193. This patch adds a `-Prelease` profile. If present, it will set `createDependencyReducedPom` to true. The consequences are: - If you are releasing Spark with this profile, you are fine as long as you use maven 3.2.x or before. - If you are releasing Spark without this profile, you will run into SPARK-8781. - If you are not releasing Spark but you are using this profile, you may run into SPARK-8819. - If you are not releasing Spark and you did not include this profile, you are fine. This is all documented in `pom.xml` and tested locally with both versions of maven. Author: Andrew Or and...@databricks.com Closes #7219 from andrewor14/fix-maven-build and squashes the following commits: 1d37e87 [Andrew Or] Merge branch 'master' of github.com:apache/spark into fix-maven-build 3574ae4 [Andrew Or] Review comments f39199c [Andrew Or] Create a -Prelease profile that flags `createDependencyReducedPom` Conflicts: dev/create-release/create-release.sh pom.xml Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/5f1d1c0b Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/5f1d1c0b Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/5f1d1c0b Branch: refs/heads/branch-1.3 Commit: 5f1d1c0b826347e5518bdb079192f62f5cd4767b Parents: 502e1fd Author: Andrew Or and...@databricks.com Authored: Mon Jul 6 19:22:30 2015 -0700 Committer: Andrew Or and...@databricks.com Committed: Mon Jul 6 19:25:36 2015 -0700 -- dev/create-release/create-release.sh | 4 ++-- pom.xml | 23 +++ 2 files changed, 25 insertions(+), 2 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/5f1d1c0b/dev/create-release/create-release.sh -- diff --git a/dev/create-release/create-release.sh b/dev/create-release/create-release.sh index 0403594..0979d5e 100755 --- a/dev/create-release/create-release.sh +++ b/dev/create-release/create-release.sh @@ -117,13 +117,13 @@ if [[ ! $@ =~ --skip-publish ]]; then echo Created Nexus staging repository: $staged_repo_id build/mvn -DskipTests -Dhadoop.version=2.2.0 -Dyarn.version=2.2.0 \ --Pyarn -Phive -Phive-thriftserver -Phadoop-2.2 -Pspark-ganglia-lgpl -Pkinesis-asl \ +-Pyarn -Phive -Phive-thriftserver -Phadoop-2.2 -Pspark-ganglia-lgpl -Pkinesis-asl -Prelease-profile \ clean install ./dev/change-version-to-2.11.sh build/mvn -DskipTests -Dhadoop.version=2.2.0 -Dyarn.version=2.2.0 \ --Dscala-2.11 -Pyarn -Phive -Phadoop-2.2 -Pspark-ganglia-lgpl -Pkinesis-asl \ +-Dscala-2.11 -Pyarn -Phive -Phadoop-2.2 -Pspark-ganglia-lgpl -Pkinesis-asl -Prelease-profile \ clean install ./dev/change-version-to-2.10.sh http://git-wip-us.apache.org/repos/asf/spark/blob/5f1d1c0b/pom.xml -- diff --git a/pom.xml b/pom.xml index b0376ac..bcc2f51 100644 --- a/pom.xml +++ b/pom.xml @@ -156,6 +156,7 @@ codehaus.jackson.version1.8.8/codehaus.jackson.version fasterxml.jackson.version2.4.4/fasterxml.jackson.version snappy.version1.1.1.7/snappy.version +create.dependency.reduced.pomfalse/create.dependency.reduced.pom !-- Dependency scopes that can be overridden by enabling certain profiles. These profiles are @@ -1367,6 +1368,8 @@ version2.2/version configuration shadedArtifactAttachedfalse/shadedArtifactAttached + !-- Work around MSHADE-148. See SPARK-8819. -- + createDependencyReducedPom${create.dependency.reduced.pom}/createDependencyReducedPom artifactSet includes !-- At a minimum we must include this to force effective pom generation -- @@ -1748,6 +1751,26 @@ /properties /profile +profile + !-- + Use this profile only for making Spark releases. Note that due to SPARK-8819, + you must use maven version 3.2.x or before to avoid running into MSHADE-148. + -- + idrelease-profile/id + properties +!-- +The maven shade plugin has a bug where enabling the `createDependencyReducedPom` +property causes maven to go into an infinite loop (MSHADE-148). This is only an +issue for the Spark build if the maven version is 3.3.x or newer (SPARK-8819). + +However, since
spark git commit: Revert [SPARK-8781] Fix variables in published pom.xml are not resolved
Repository: spark Updated Branches: refs/heads/master 9eae5fa64 - 929dfa24b Revert [SPARK-8781] Fix variables in published pom.xml are not resolved This reverts commit 82cf3315e690f4ac15b50edea6a3d673aa5be4c0. Conflicts: pom.xml Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/929dfa24 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/929dfa24 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/929dfa24 Branch: refs/heads/master Commit: 929dfa24b13b8d1bd08aa8a3b256766a9765f985 Parents: 9eae5fa Author: Andrew Or and...@databricks.com Authored: Mon Jul 6 19:27:04 2015 -0700 Committer: Andrew Or and...@databricks.com Committed: Mon Jul 6 19:27:04 2015 -0700 -- extras/kinesis-asl/pom.xml | 7 +++ .../kinesis/KinesisReceiverSuite.scala | 20 2 files changed, 15 insertions(+), 12 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/929dfa24/extras/kinesis-asl/pom.xml -- diff --git a/extras/kinesis-asl/pom.xml b/extras/kinesis-asl/pom.xml index 5289073..c242e7a 100644 --- a/extras/kinesis-asl/pom.xml +++ b/extras/kinesis-asl/pom.xml @@ -42,6 +42,13 @@ /dependency dependency groupIdorg.apache.spark/groupId + artifactIdspark-core_${scala.binary.version}/artifactId + version${project.version}/version + typetest-jar/type + scopetest/scope +/dependency +dependency + groupIdorg.apache.spark/groupId artifactIdspark-streaming_${scala.binary.version}/artifactId version${project.version}/version typetest-jar/type http://git-wip-us.apache.org/repos/asf/spark/blob/929dfa24/extras/kinesis-asl/src/test/scala/org/apache/spark/streaming/kinesis/KinesisReceiverSuite.scala -- diff --git a/extras/kinesis-asl/src/test/scala/org/apache/spark/streaming/kinesis/KinesisReceiverSuite.scala b/extras/kinesis-asl/src/test/scala/org/apache/spark/streaming/kinesis/KinesisReceiverSuite.scala index 6c26262..2103dca 100644 --- a/extras/kinesis-asl/src/test/scala/org/apache/spark/streaming/kinesis/KinesisReceiverSuite.scala +++ b/extras/kinesis-asl/src/test/scala/org/apache/spark/streaming/kinesis/KinesisReceiverSuite.scala @@ -26,23 +26,18 @@ import com.amazonaws.services.kinesis.clientlibrary.lib.worker.InitialPositionIn import com.amazonaws.services.kinesis.clientlibrary.types.ShutdownReason import com.amazonaws.services.kinesis.model.Record import org.mockito.Mockito._ -// scalastyle:off -// To avoid introducing a dependency on Spark core tests, simply use scalatest's FunSuite -// here instead of our own SparkFunSuite. Introducing the dependency has caused problems -// in the past (SPARK-8781) that are complicated by bugs in the maven shade plugin (MSHADE-148). -import org.scalatest.{BeforeAndAfter, FunSuite, Matchers} +import org.scalatest.{BeforeAndAfter, Matchers} import org.scalatest.mock.MockitoSugar import org.apache.spark.storage.StorageLevel -import org.apache.spark.streaming.{Milliseconds, Seconds, StreamingContext} +import org.apache.spark.streaming.{Milliseconds, Seconds, StreamingContext, TestSuiteBase} import org.apache.spark.util.{Clock, ManualClock, Utils} /** * Suite of Kinesis streaming receiver tests focusing mostly on the KinesisRecordProcessor */ -class KinesisReceiverSuite extends FunSuite with Matchers with BeforeAndAfter - with MockitoSugar { -// scalastyle:on +class KinesisReceiverSuite extends TestSuiteBase with Matchers with BeforeAndAfter +with MockitoSugar { val app = TestKinesisReceiver val stream = mySparkStream @@ -62,7 +57,7 @@ class KinesisReceiverSuite extends FunSuite with Matchers with BeforeAndAfter var checkpointStateMock: KinesisCheckpointState = _ var currentClockMock: Clock = _ - before { + override def beforeFunction(): Unit = { receiverMock = mock[KinesisReceiver] checkpointerMock = mock[IRecordProcessorCheckpointer] checkpointClockMock = mock[ManualClock] @@ -70,7 +65,8 @@ class KinesisReceiverSuite extends FunSuite with Matchers with BeforeAndAfter currentClockMock = mock[Clock] } - after { + override def afterFunction(): Unit = { +super.afterFunction() // Since this suite was originally written using EasyMock, add this to preserve the old // mocking semantics (see SPARK-5735 for more details) verifyNoMoreInteractions(receiverMock, checkpointerMock, checkpointClockMock, @@ -78,7 +74,7 @@ class KinesisReceiverSuite extends FunSuite with Matchers with BeforeAndAfter } test(KinesisUtils API) { -val ssc = new StreamingContext(local[2], getClass.getSimpleName, Seconds(1)) +val ssc =
[2/2] spark git commit: Preparing development version 1.4.2-SNAPSHOT
Preparing development version 1.4.2-SNAPSHOT Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/5c080c2e Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/5c080c2e Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/5c080c2e Branch: refs/heads/branch-1.4 Commit: 5c080c2ee498a97a8aaba8f96bfa17c7444f6bb6 Parents: f8aab7a Author: Patrick Wendell pwend...@gmail.com Authored: Mon Jul 6 19:39:43 2015 -0700 Committer: Patrick Wendell pwend...@gmail.com Committed: Mon Jul 6 19:39:43 2015 -0700 -- assembly/pom.xml | 2 +- bagel/pom.xml | 2 +- core/pom.xml | 2 +- examples/pom.xml | 2 +- external/flume-sink/pom.xml | 2 +- external/flume/pom.xml| 2 +- external/kafka-assembly/pom.xml | 2 +- external/kafka/pom.xml| 2 +- external/mqtt/pom.xml | 2 +- external/twitter/pom.xml | 2 +- external/zeromq/pom.xml | 2 +- extras/java8-tests/pom.xml| 2 +- extras/kinesis-asl/pom.xml| 2 +- extras/spark-ganglia-lgpl/pom.xml | 2 +- graphx/pom.xml| 2 +- launcher/pom.xml | 2 +- mllib/pom.xml | 2 +- network/common/pom.xml| 2 +- network/shuffle/pom.xml | 2 +- network/yarn/pom.xml | 2 +- pom.xml | 2 +- repl/pom.xml | 2 +- sql/catalyst/pom.xml | 2 +- sql/core/pom.xml | 2 +- sql/hive-thriftserver/pom.xml | 2 +- sql/hive/pom.xml | 2 +- streaming/pom.xml | 2 +- tools/pom.xml | 2 +- unsafe/pom.xml| 2 +- yarn/pom.xml | 2 +- 30 files changed, 30 insertions(+), 30 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/5c080c2e/assembly/pom.xml -- diff --git a/assembly/pom.xml b/assembly/pom.xml index ba233e7..228db59 100644 --- a/assembly/pom.xml +++ b/assembly/pom.xml @@ -21,7 +21,7 @@ parent groupIdorg.apache.spark/groupId artifactIdspark-parent_2.10/artifactId -version1.4.1/version +version1.4.2-SNAPSHOT/version relativePath../pom.xml/relativePath /parent http://git-wip-us.apache.org/repos/asf/spark/blob/5c080c2e/bagel/pom.xml -- diff --git a/bagel/pom.xml b/bagel/pom.xml index c5e9183..ce791a6 100644 --- a/bagel/pom.xml +++ b/bagel/pom.xml @@ -21,7 +21,7 @@ parent groupIdorg.apache.spark/groupId artifactIdspark-parent_2.10/artifactId -version1.4.1/version +version1.4.2-SNAPSHOT/version relativePath../pom.xml/relativePath /parent http://git-wip-us.apache.org/repos/asf/spark/blob/5c080c2e/core/pom.xml -- diff --git a/core/pom.xml b/core/pom.xml index f0d236d..176ea9b 100644 --- a/core/pom.xml +++ b/core/pom.xml @@ -21,7 +21,7 @@ parent groupIdorg.apache.spark/groupId artifactIdspark-parent_2.10/artifactId -version1.4.1/version +version1.4.2-SNAPSHOT/version relativePath../pom.xml/relativePath /parent http://git-wip-us.apache.org/repos/asf/spark/blob/5c080c2e/examples/pom.xml -- diff --git a/examples/pom.xml b/examples/pom.xml index e9a9cc2..877c2fb 100644 --- a/examples/pom.xml +++ b/examples/pom.xml @@ -21,7 +21,7 @@ parent groupIdorg.apache.spark/groupId artifactIdspark-parent_2.10/artifactId -version1.4.1/version +version1.4.2-SNAPSHOT/version relativePath../pom.xml/relativePath /parent http://git-wip-us.apache.org/repos/asf/spark/blob/5c080c2e/external/flume-sink/pom.xml -- diff --git a/external/flume-sink/pom.xml b/external/flume-sink/pom.xml index 7eae7a7..ad431fa 100644 --- a/external/flume-sink/pom.xml +++ b/external/flume-sink/pom.xml @@ -21,7 +21,7 @@ parent groupIdorg.apache.spark/groupId artifactIdspark-parent_2.10/artifactId -version1.4.1/version +version1.4.2-SNAPSHOT/version relativePath../../pom.xml/relativePath /parent http://git-wip-us.apache.org/repos/asf/spark/blob/5c080c2e/external/flume/pom.xml -- diff --git a/external/flume/pom.xml b/external/flume/pom.xml index b3ad09a..9789435 100644 --- a/external/flume/pom.xml +++ b/external/flume/pom.xml @@ -21,7 +21,7 @@ parent groupIdorg.apache.spark/groupId artifactIdspark-parent_2.10/artifactId -version1.4.1/version +
Git Push Summary
Repository: spark Updated Tags: refs/tags/v1.4.1-rc3 [created] f8aab7a7b - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[1/2] spark git commit: Preparing Spark release v1.4.1-rc3
Repository: spark Updated Branches: refs/heads/branch-1.4 997444c78 - 5c080c2ee Preparing Spark release v1.4.1-rc3 Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/f8aab7a7 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/f8aab7a7 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/f8aab7a7 Branch: refs/heads/branch-1.4 Commit: f8aab7a7bc5b07979eacff6a0566576dce4c9d0e Parents: 997444c Author: Patrick Wendell pwend...@gmail.com Authored: Mon Jul 6 19:39:37 2015 -0700 Committer: Patrick Wendell pwend...@gmail.com Committed: Mon Jul 6 19:39:37 2015 -0700 -- assembly/pom.xml | 2 +- bagel/pom.xml | 2 +- core/pom.xml | 2 +- examples/pom.xml | 2 +- external/flume-sink/pom.xml | 2 +- external/flume/pom.xml| 2 +- external/kafka-assembly/pom.xml | 2 +- external/kafka/pom.xml| 2 +- external/mqtt/pom.xml | 2 +- external/twitter/pom.xml | 2 +- external/zeromq/pom.xml | 2 +- extras/java8-tests/pom.xml| 2 +- extras/kinesis-asl/pom.xml| 2 +- extras/spark-ganglia-lgpl/pom.xml | 2 +- graphx/pom.xml| 2 +- launcher/pom.xml | 2 +- mllib/pom.xml | 2 +- network/common/pom.xml| 2 +- network/shuffle/pom.xml | 2 +- network/yarn/pom.xml | 2 +- pom.xml | 2 +- repl/pom.xml | 2 +- sql/catalyst/pom.xml | 2 +- sql/core/pom.xml | 2 +- sql/hive-thriftserver/pom.xml | 2 +- sql/hive/pom.xml | 2 +- streaming/pom.xml | 2 +- tools/pom.xml | 2 +- unsafe/pom.xml| 2 +- yarn/pom.xml | 2 +- 30 files changed, 30 insertions(+), 30 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/f8aab7a7/assembly/pom.xml -- diff --git a/assembly/pom.xml b/assembly/pom.xml index 228db59..ba233e7 100644 --- a/assembly/pom.xml +++ b/assembly/pom.xml @@ -21,7 +21,7 @@ parent groupIdorg.apache.spark/groupId artifactIdspark-parent_2.10/artifactId -version1.4.2-SNAPSHOT/version +version1.4.1/version relativePath../pom.xml/relativePath /parent http://git-wip-us.apache.org/repos/asf/spark/blob/f8aab7a7/bagel/pom.xml -- diff --git a/bagel/pom.xml b/bagel/pom.xml index ce791a6..c5e9183 100644 --- a/bagel/pom.xml +++ b/bagel/pom.xml @@ -21,7 +21,7 @@ parent groupIdorg.apache.spark/groupId artifactIdspark-parent_2.10/artifactId -version1.4.2-SNAPSHOT/version +version1.4.1/version relativePath../pom.xml/relativePath /parent http://git-wip-us.apache.org/repos/asf/spark/blob/f8aab7a7/core/pom.xml -- diff --git a/core/pom.xml b/core/pom.xml index 176ea9b..f0d236d 100644 --- a/core/pom.xml +++ b/core/pom.xml @@ -21,7 +21,7 @@ parent groupIdorg.apache.spark/groupId artifactIdspark-parent_2.10/artifactId -version1.4.2-SNAPSHOT/version +version1.4.1/version relativePath../pom.xml/relativePath /parent http://git-wip-us.apache.org/repos/asf/spark/blob/f8aab7a7/examples/pom.xml -- diff --git a/examples/pom.xml b/examples/pom.xml index 877c2fb..e9a9cc2 100644 --- a/examples/pom.xml +++ b/examples/pom.xml @@ -21,7 +21,7 @@ parent groupIdorg.apache.spark/groupId artifactIdspark-parent_2.10/artifactId -version1.4.2-SNAPSHOT/version +version1.4.1/version relativePath../pom.xml/relativePath /parent http://git-wip-us.apache.org/repos/asf/spark/blob/f8aab7a7/external/flume-sink/pom.xml -- diff --git a/external/flume-sink/pom.xml b/external/flume-sink/pom.xml index ad431fa..7eae7a7 100644 --- a/external/flume-sink/pom.xml +++ b/external/flume-sink/pom.xml @@ -21,7 +21,7 @@ parent groupIdorg.apache.spark/groupId artifactIdspark-parent_2.10/artifactId -version1.4.2-SNAPSHOT/version +version1.4.1/version relativePath../../pom.xml/relativePath /parent http://git-wip-us.apache.org/repos/asf/spark/blob/f8aab7a7/external/flume/pom.xml -- diff --git a/external/flume/pom.xml b/external/flume/pom.xml index 9789435..b3ad09a 100644 --- a/external/flume/pom.xml +++ b/external/flume/pom.xml @@ -21,7 +21,7 @@ parent groupIdorg.apache.spark/groupId
spark git commit: Revert [SPARK-8781] Fix variables in published pom.xml are not resolved
Repository: spark Updated Branches: refs/heads/branch-1.3 5f1d1c0b8 - 960aec976 Revert [SPARK-8781] Fix variables in published pom.xml are not resolved This reverts commit 502e1fd68f9efc0311062146fa058dec3ef0e70b. Conflicts: pom.xml Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/960aec97 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/960aec97 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/960aec97 Branch: refs/heads/branch-1.3 Commit: 960aec976581f233bc93a5c60907975b3c9dcf38 Parents: 5f1d1c0 Author: Andrew Or and...@databricks.com Authored: Mon Jul 6 19:28:45 2015 -0700 Committer: Andrew Or and...@databricks.com Committed: Mon Jul 6 19:28:45 2015 -0700 -- extras/kinesis-asl/pom.xml | 7 .../kinesis/KinesisReceiverSuite.scala | 37 ++-- 2 files changed, 26 insertions(+), 18 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/960aec97/extras/kinesis-asl/pom.xml -- diff --git a/extras/kinesis-asl/pom.xml b/extras/kinesis-asl/pom.xml index 3ac273d..4eea37d 100644 --- a/extras/kinesis-asl/pom.xml +++ b/extras/kinesis-asl/pom.xml @@ -42,6 +42,13 @@ /dependency dependency groupIdorg.apache.spark/groupId + artifactIdspark-core_${scala.binary.version}/artifactId + version${project.version}/version + typetest-jar/type + scopetest/scope +/dependency +dependency + groupIdorg.apache.spark/groupId artifactIdspark-streaming_${scala.binary.version}/artifactId version${project.version}/version typetest-jar/type http://git-wip-us.apache.org/repos/asf/spark/blob/960aec97/extras/kinesis-asl/src/test/scala/org/apache/spark/streaming/kinesis/KinesisReceiverSuite.scala -- diff --git a/extras/kinesis-asl/src/test/scala/org/apache/spark/streaming/kinesis/KinesisReceiverSuite.scala b/extras/kinesis-asl/src/test/scala/org/apache/spark/streaming/kinesis/KinesisReceiverSuite.scala index c098294..255fe65 100644 --- a/extras/kinesis-asl/src/test/scala/org/apache/spark/streaming/kinesis/KinesisReceiverSuite.scala +++ b/extras/kinesis-asl/src/test/scala/org/apache/spark/streaming/kinesis/KinesisReceiverSuite.scala @@ -20,6 +20,18 @@ import java.nio.ByteBuffer import scala.collection.JavaConversions.seqAsJavaList +import org.apache.spark.storage.StorageLevel +import org.apache.spark.streaming.Milliseconds +import org.apache.spark.streaming.Seconds +import org.apache.spark.streaming.StreamingContext +import org.apache.spark.streaming.TestSuiteBase +import org.apache.spark.util.{ManualClock, Clock} + +import org.mockito.Mockito._ +import org.scalatest.BeforeAndAfter +import org.scalatest.Matchers +import org.scalatest.mock.MockitoSugar + import com.amazonaws.services.kinesis.clientlibrary.exceptions.InvalidStateException import com.amazonaws.services.kinesis.clientlibrary.exceptions.KinesisClientLibDependencyException import com.amazonaws.services.kinesis.clientlibrary.exceptions.ShutdownException @@ -28,24 +40,12 @@ import com.amazonaws.services.kinesis.clientlibrary.interfaces.IRecordProcessorC import com.amazonaws.services.kinesis.clientlibrary.lib.worker.InitialPositionInStream import com.amazonaws.services.kinesis.clientlibrary.types.ShutdownReason import com.amazonaws.services.kinesis.model.Record -import org.mockito.Mockito._ -// scalastyle:off -// To avoid introducing a dependency on Spark core tests, simply use scalatest's FunSuite -// here instead of our own SparkFunSuite. Introducing the dependency has caused problems -// in the past (SPARK-8781) that are complicated by bugs in the maven shade plugin (MSHADE-148). -import org.scalatest.{BeforeAndAfter, FunSuite, Matchers} -import org.scalatest.mock.MockitoSugar - -import org.apache.spark.storage.StorageLevel -import org.apache.spark.streaming.{Milliseconds, Seconds, StreamingContext} -import org.apache.spark.util.{Clock, ManualClock} /** * Suite of Kinesis streaming receiver tests focusing mostly on the KinesisRecordProcessor */ -class KinesisReceiverSuite extends FunSuite with Matchers with BeforeAndAfter - with MockitoSugar { -// scalastyle:on +class KinesisReceiverSuite extends TestSuiteBase with Matchers with BeforeAndAfter +with MockitoSugar { val app = TestKinesisReceiver val stream = mySparkStream @@ -65,7 +65,7 @@ class KinesisReceiverSuite extends FunSuite with Matchers with BeforeAndAfter var checkpointStateMock: KinesisCheckpointState = _ var currentClockMock: Clock = _ - before { + override def beforeFunction() = { receiverMock = mock[KinesisReceiver] checkpointerMock =
spark git commit: Revert [SPARK-8781] Fix variables in published pom.xml are not resolved
Repository: spark Updated Branches: refs/heads/branch-1.4 947b84598 - 997444c78 Revert [SPARK-8781] Fix variables in published pom.xml are not resolved This reverts commit 82cf3315e690f4ac15b50edea6a3d673aa5be4c0. Conflicts: pom.xml Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/997444c7 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/997444c7 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/997444c7 Branch: refs/heads/branch-1.4 Commit: 997444c7875f9079afcebd41878bdf5e071451f6 Parents: 947b845 Author: Andrew Or and...@databricks.com Authored: Mon Jul 6 19:27:04 2015 -0700 Committer: Andrew Or and...@databricks.com Committed: Mon Jul 6 19:27:55 2015 -0700 -- extras/kinesis-asl/pom.xml | 7 +++ .../kinesis/KinesisReceiverSuite.scala | 20 2 files changed, 15 insertions(+), 12 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/997444c7/extras/kinesis-asl/pom.xml -- diff --git a/extras/kinesis-asl/pom.xml b/extras/kinesis-asl/pom.xml index 7ab64da..8f75931 100644 --- a/extras/kinesis-asl/pom.xml +++ b/extras/kinesis-asl/pom.xml @@ -42,6 +42,13 @@ /dependency dependency groupIdorg.apache.spark/groupId + artifactIdspark-core_${scala.binary.version}/artifactId + version${project.version}/version + typetest-jar/type + scopetest/scope +/dependency +dependency + groupIdorg.apache.spark/groupId artifactIdspark-streaming_${scala.binary.version}/artifactId version${project.version}/version typetest-jar/type http://git-wip-us.apache.org/repos/asf/spark/blob/997444c7/extras/kinesis-asl/src/test/scala/org/apache/spark/streaming/kinesis/KinesisReceiverSuite.scala -- diff --git a/extras/kinesis-asl/src/test/scala/org/apache/spark/streaming/kinesis/KinesisReceiverSuite.scala b/extras/kinesis-asl/src/test/scala/org/apache/spark/streaming/kinesis/KinesisReceiverSuite.scala index 6c26262..2103dca 100644 --- a/extras/kinesis-asl/src/test/scala/org/apache/spark/streaming/kinesis/KinesisReceiverSuite.scala +++ b/extras/kinesis-asl/src/test/scala/org/apache/spark/streaming/kinesis/KinesisReceiverSuite.scala @@ -26,23 +26,18 @@ import com.amazonaws.services.kinesis.clientlibrary.lib.worker.InitialPositionIn import com.amazonaws.services.kinesis.clientlibrary.types.ShutdownReason import com.amazonaws.services.kinesis.model.Record import org.mockito.Mockito._ -// scalastyle:off -// To avoid introducing a dependency on Spark core tests, simply use scalatest's FunSuite -// here instead of our own SparkFunSuite. Introducing the dependency has caused problems -// in the past (SPARK-8781) that are complicated by bugs in the maven shade plugin (MSHADE-148). -import org.scalatest.{BeforeAndAfter, FunSuite, Matchers} +import org.scalatest.{BeforeAndAfter, Matchers} import org.scalatest.mock.MockitoSugar import org.apache.spark.storage.StorageLevel -import org.apache.spark.streaming.{Milliseconds, Seconds, StreamingContext} +import org.apache.spark.streaming.{Milliseconds, Seconds, StreamingContext, TestSuiteBase} import org.apache.spark.util.{Clock, ManualClock, Utils} /** * Suite of Kinesis streaming receiver tests focusing mostly on the KinesisRecordProcessor */ -class KinesisReceiverSuite extends FunSuite with Matchers with BeforeAndAfter - with MockitoSugar { -// scalastyle:on +class KinesisReceiverSuite extends TestSuiteBase with Matchers with BeforeAndAfter +with MockitoSugar { val app = TestKinesisReceiver val stream = mySparkStream @@ -62,7 +57,7 @@ class KinesisReceiverSuite extends FunSuite with Matchers with BeforeAndAfter var checkpointStateMock: KinesisCheckpointState = _ var currentClockMock: Clock = _ - before { + override def beforeFunction(): Unit = { receiverMock = mock[KinesisReceiver] checkpointerMock = mock[IRecordProcessorCheckpointer] checkpointClockMock = mock[ManualClock] @@ -70,7 +65,8 @@ class KinesisReceiverSuite extends FunSuite with Matchers with BeforeAndAfter currentClockMock = mock[Clock] } - after { + override def afterFunction(): Unit = { +super.afterFunction() // Since this suite was originally written using EasyMock, add this to preserve the old // mocking semantics (see SPARK-5735 for more details) verifyNoMoreInteractions(receiverMock, checkpointerMock, checkpointClockMock, @@ -78,7 +74,7 @@ class KinesisReceiverSuite extends FunSuite with Matchers with BeforeAndAfter } test(KinesisUtils API) { -val ssc = new StreamingContext(local[2], getClass.getSimpleName, Seconds(1)) +
spark git commit: [SPARK-6747] [SQL] Throw an AnalysisException when unsupported Java list types used in Hive UDF
Repository: spark Updated Branches: refs/heads/master 929dfa24b - 1821fc165 [SPARK-6747] [SQL] Throw an AnalysisException when unsupported Java list types used in Hive UDF The current implementation can't handle List as a return type in Hive UDF and throws meaningless Match Error. We assume an UDF below; public class UDFToListString extends UDF { public ListString evaluate(Object o) { return Arrays.asList(xxx, yyy, zzz); } } An exception of scala.MatchError is thrown as follows when the UDF used; scala.MatchError: interface java.util.List (of class java.lang.Class) at org.apache.spark.sql.hive.HiveInspectors$class.javaClassToDataType(HiveInspectors.scala:174) at org.apache.spark.sql.hive.HiveSimpleUdf.javaClassToDataType(hiveUdfs.scala:76) at org.apache.spark.sql.hive.HiveSimpleUdf.dataType$lzycompute(hiveUdfs.scala:106) at org.apache.spark.sql.hive.HiveSimpleUdf.dataType(hiveUdfs.scala:106) at org.apache.spark.sql.catalyst.expressions.Alias.toAttribute(namedExpressions.scala:131) at org.apache.spark.sql.catalyst.planning.PhysicalOperation$$anonfun$collectAliases$1.applyOrElse(patterns.scala:95) at org.apache.spark.sql.catalyst.planning.PhysicalOperation$$anonfun$collectAliases$1.applyOrElse(patterns.scala:94) at scala.runtime.AbstractPartialFunction.apply(AbstractPartialFunction.scala:33) at scala.collection.TraversableLike$$anonfun$collect$1.apply(TraversableLike.scala:278) ... To make udf developers more understood, we need to throw a more suitable exception. Author: Takeshi YAMAMURO linguin@gmail.com Closes #7248 from maropu/FixBugInHiveInspectors and squashes the following commits: 1c3df2a [Takeshi YAMAMURO] Fix comments 56305de [Takeshi YAMAMURO] Fix conflicts 92ed7a6 [Takeshi YAMAMURO] Throw an exception when java list type used 2844a8e [Takeshi YAMAMURO] Apply comments 7114a47 [Takeshi YAMAMURO] Add TODO comments in UDFToListString of HiveUdfSuite fdb2ae4 [Takeshi YAMAMURO] Add StringToUtf8 to comvert String into UTF8String af61f2e [Takeshi YAMAMURO] Remove a new type 7f812fd [Takeshi YAMAMURO] Fix code-style errors 6984bf4 [Takeshi YAMAMURO] Apply review comments 93e3d4e [Takeshi YAMAMURO] Add a blank line at the end of UDFToListString ee232db [Takeshi YAMAMURO] Support List as a return type in Hive UDF 1e82316 [Takeshi YAMAMURO] Apply comments 21e8763 [Takeshi YAMAMURO] Add TODO comments in UDFToListString of HiveUdfSuite a488712 [Takeshi YAMAMURO] Add StringToUtf8 to comvert String into UTF8String 1c7b9d1 [Takeshi YAMAMURO] Remove a new type f965c34 [Takeshi YAMAMURO] Fix code-style errors 9406416 [Takeshi YAMAMURO] Apply review comments e21ce7e [Takeshi YAMAMURO] Add a blank line at the end of UDFToListString e553f10 [Takeshi YAMAMURO] Support List as a return type in Hive UDF Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/1821fc16 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/1821fc16 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/1821fc16 Branch: refs/heads/master Commit: 1821fc165808143e98b3d9626141b1a55bde90ac Parents: 929dfa2 Author: Takeshi YAMAMURO linguin@gmail.com Authored: Mon Jul 6 19:44:31 2015 -0700 Committer: Michael Armbrust mich...@databricks.com Committed: Mon Jul 6 19:44:31 2015 -0700 -- .../apache/spark/sql/hive/HiveInspectors.scala | 10 +- .../spark/sql/hive/execution/UDFToListInt.java | 29 ++ .../sql/hive/execution/UDFToListString.java | 29 ++ .../spark/sql/hive/execution/HiveUDFSuite.scala | 32 +++- 4 files changed, 98 insertions(+), 2 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/1821fc16/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveInspectors.scala -- diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveInspectors.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveInspectors.scala index a6b8ead..7423d80 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveInspectors.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveInspectors.scala @@ -26,8 +26,8 @@ import org.apache.hadoop.{io = hadoopIo} import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.catalyst.util.DateTimeUtils -import org.apache.spark.sql.types import org.apache.spark.sql.types._ +import org.apache.spark.sql.{AnalysisException, types} import org.apache.spark.unsafe.types.UTF8String /* Implicit conversions */ @@ -218,6 +218,14 @@ private[hive] trait HiveInspectors { // Hive seems to return this for struct types? case c: Class[_] if c == classOf[java.lang.Object] = NullType + +// java list type unsupported +case c: Class[_] if c == classOf[java.util.List[_]] = + throw new
spark git commit: [SPARK-5562] [MLLIB] LDA should handle empty document.
Repository: spark Updated Branches: refs/heads/master 1821fc165 - 6718c1eb6 [SPARK-5562] [MLLIB] LDA should handle empty document. See the jira https://issues.apache.org/jira/browse/SPARK-5562 Author: Alok Singh singhal@Aloks-MacBook-Pro.local Author: Alok Singh sing...@aloks-mbp.usca.ibm.com Author: Alok Singh âsing...@us.ibm.comâ Closes #7064 from aloknsingh/aloknsingh_SPARK-5562 and squashes the following commits: 259a0a7 [Alok Singh] change as per the comments by @jkbradley be48491 [Alok Singh] [SPARK-5562][MLlib] re-order import in alphabhetical order c01311b [Alok Singh] [SPARK-5562][MLlib] fix the newline typo b271c8a [Alok Singh] [SPARK-5562][Mllib] As per github discussion with jkbradley. We would like to simply things. 7c06251 [Alok Singh] [SPARK-5562][MLlib] modified the JavaLDASuite for test passing c710cb6 [Alok Singh] fix the scala code style to have space after : 2572a08 [Alok Singh] [SPARK-5562][MLlib] change the import xyz._ to the import xyz.{c1, c2} .. ab55fbf [Alok Singh] [SPARK-5562][MLlib] Change as per Sean Owen's comments https://github.com/apache/spark/pull/7064/files#diff-9236d23975e6f5a5608ffc81dfd79146 9f4f9ea [Alok Singh] [SPARK-5562][MLlib] LDA should handle empty document. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/6718c1eb Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/6718c1eb Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/6718c1eb Branch: refs/heads/master Commit: 6718c1eb671faaf5c1d865ad5d01dbf78dae9cd2 Parents: 1821fc1 Author: Alok Singh singhal@Aloks-MacBook-Pro.local Authored: Mon Jul 6 21:53:55 2015 -0700 Committer: Joseph K. Bradley jos...@databricks.com Committed: Mon Jul 6 21:53:55 2015 -0700 -- docs/mllib-clustering.md | 2 +- .../apache/spark/mllib/clustering/JavaLDASuite.java| 13 +++-- .../org/apache/spark/mllib/clustering/LDASuite.scala | 13 +++-- 3 files changed, 23 insertions(+), 5 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/6718c1eb/docs/mllib-clustering.md -- diff --git a/docs/mllib-clustering.md b/docs/mllib-clustering.md index 3aad414..d72dc20 100644 --- a/docs/mllib-clustering.md +++ b/docs/mllib-clustering.md @@ -447,7 +447,7 @@ It supports different inference algorithms via `setOptimizer` function. EMLDAOpt on the likelihood function and yields comprehensive results, while OnlineLDAOptimizer uses iterative mini-batch sampling for [online variational inference](https://www.cs.princeton.edu/~blei/papers/HoffmanBleiBach2010b.pdf) and is generally memory friendly. After fitting on the documents, LDA provides: * Topics: Inferred topics, each of which is a probability distribution over terms (words). -* Topic distributions for documents: For each document in the training set, LDA gives a probability distribution over topics. (EM only) +* Topic distributions for documents: For each non empty document in the training set, LDA gives a probability distribution over topics. (EM only). Note that for empty documents, we don't create the topic distributions. (EM only) LDA takes the following parameters: http://git-wip-us.apache.org/repos/asf/spark/blob/6718c1eb/mllib/src/test/java/org/apache/spark/mllib/clustering/JavaLDASuite.java -- diff --git a/mllib/src/test/java/org/apache/spark/mllib/clustering/JavaLDASuite.java b/mllib/src/test/java/org/apache/spark/mllib/clustering/JavaLDASuite.java index 581c033..b48f190 100644 --- a/mllib/src/test/java/org/apache/spark/mllib/clustering/JavaLDASuite.java +++ b/mllib/src/test/java/org/apache/spark/mllib/clustering/JavaLDASuite.java @@ -28,12 +28,13 @@ import static org.junit.Assert.assertArrayEquals; import org.junit.Before; import org.junit.Test; +import org.apache.spark.api.java.function.Function; import org.apache.spark.api.java.JavaPairRDD; import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.mllib.linalg.Matrix; import org.apache.spark.mllib.linalg.Vector; - +import org.apache.spark.mllib.linalg.Vectors; public class JavaLDASuite implements Serializable { private transient JavaSparkContext sc; @@ -110,7 +111,15 @@ public class JavaLDASuite implements Serializable { // Check: topic distributions JavaPairRDDLong, Vector topicDistributions = model.javaTopicDistributions(); -assertEquals(topicDistributions.count(), corpus.count()); +// SPARK-5562. since the topicDistribution returns the distribution of the non empty docs +// over topics. Compare it against nonEmptyCorpus instead of corpus +JavaPairRDDLong, Vector nonEmptyCorpus
spark git commit: [HOTFIX] Rename release-profile to release
Repository: spark Updated Branches: refs/heads/master c46aaf47f - 1cb2629f1 [HOTFIX] Rename release-profile to release when publishing releases. We named it as 'release-profile' because that is the Maven convention. However, it turns out this special name causes several other things to kick-in when we are creating releases that are not desirable. For instance, it triggers the javadoc plugin to run, which actually fails in our current build set-up. The fix is just to rename this to a different profile to have no collateral damage associated with its use. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/1cb2629f Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/1cb2629f Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/1cb2629f Branch: refs/heads/master Commit: 1cb2629f1aa466f92246828c562ea6f35c89ab87 Parents: c46aaf4 Author: Patrick Wendell patr...@databricks.com Authored: Mon Jul 6 22:14:24 2015 -0700 Committer: Patrick Wendell patr...@databricks.com Committed: Mon Jul 6 22:17:30 2015 -0700 -- dev/create-release/create-release.sh | 4 ++-- pom.xml | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/1cb2629f/dev/create-release/create-release.sh -- diff --git a/dev/create-release/create-release.sh b/dev/create-release/create-release.sh index cfe2cd4..30190dc 100755 --- a/dev/create-release/create-release.sh +++ b/dev/create-release/create-release.sh @@ -118,13 +118,13 @@ if [[ ! $@ =~ --skip-publish ]]; then rm -rf $SPARK_REPO - build/mvn -DskipTests -Pyarn -Phive -Prelease-profile\ + build/mvn -DskipTests -Pyarn -Phive -Prelease\ -Phive-thriftserver -Phadoop-2.2 -Pspark-ganglia-lgpl -Pkinesis-asl \ clean install ./dev/change-version-to-2.11.sh - build/mvn -DskipTests -Pyarn -Phive -Prelease-profile\ + build/mvn -DskipTests -Pyarn -Phive -Prelease\ -Dscala-2.11 -Phadoop-2.2 -Pspark-ganglia-lgpl -Pkinesis-asl \ clean install http://git-wip-us.apache.org/repos/asf/spark/blob/1cb2629f/pom.xml -- diff --git a/pom.xml b/pom.xml index fbcc915..27dd4f7 100644 --- a/pom.xml +++ b/pom.xml @@ -1835,7 +1835,7 @@ Use this profile only for making Spark releases. Note that due to SPARK-8819, you must use maven version 3.2.x or before to avoid running into MSHADE-148. -- - idrelease-profile/id + idrelease/id properties !-- The maven shade plugin has a bug where enabling the `createDependencyReducedPom` - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [HOTFIX] Rename release-profile to release
Repository: spark Updated Branches: refs/heads/branch-1.4 5c080c2ee - 397bafd7d [HOTFIX] Rename release-profile to release when publishing releases. We named it as 'release-profile' because that is the Maven convention. However, it turns out this special name causes several other things to kick-in when we are creating releases that are not desirable. For instance, it triggers the javadoc plugin to run, which actually fails in our current build set-up. The fix is just to rename this to a different profile to have no collateral damage associated with its use. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/397bafd7 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/397bafd7 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/397bafd7 Branch: refs/heads/branch-1.4 Commit: 397bafd7db7a60a7c3b0f3dd59d10cd7261f1fba Parents: 5c080c2 Author: Patrick Wendell patr...@databricks.com Authored: Mon Jul 6 22:14:24 2015 -0700 Committer: Patrick Wendell patr...@databricks.com Committed: Mon Jul 6 22:17:42 2015 -0700 -- dev/create-release/create-release.sh | 4 ++-- pom.xml | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/397bafd7/dev/create-release/create-release.sh -- diff --git a/dev/create-release/create-release.sh b/dev/create-release/create-release.sh index cfe2cd4..30190dc 100755 --- a/dev/create-release/create-release.sh +++ b/dev/create-release/create-release.sh @@ -118,13 +118,13 @@ if [[ ! $@ =~ --skip-publish ]]; then rm -rf $SPARK_REPO - build/mvn -DskipTests -Pyarn -Phive -Prelease-profile\ + build/mvn -DskipTests -Pyarn -Phive -Prelease\ -Phive-thriftserver -Phadoop-2.2 -Pspark-ganglia-lgpl -Pkinesis-asl \ clean install ./dev/change-version-to-2.11.sh - build/mvn -DskipTests -Pyarn -Phive -Prelease-profile\ + build/mvn -DskipTests -Pyarn -Phive -Prelease\ -Dscala-2.11 -Phadoop-2.2 -Pspark-ganglia-lgpl -Pkinesis-asl \ clean install http://git-wip-us.apache.org/repos/asf/spark/blob/397bafd7/pom.xml -- diff --git a/pom.xml b/pom.xml index e1b3e87..9953ad0 100644 --- a/pom.xml +++ b/pom.xml @@ -1821,7 +1821,7 @@ Use this profile only for making Spark releases. Note that due to SPARK-8819, you must use maven version 3.2.x or before to avoid running into MSHADE-148. -- - idrelease-profile/id + idrelease/id properties !-- The maven shade plugin has a bug where enabling the `createDependencyReducedPom` - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
Git Push Summary
Repository: spark Updated Tags: refs/tags/v1.4.1-rc3 [deleted] f8aab7a7b - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[2/2] spark git commit: Preparing development version 1.4.2-SNAPSHOT
Preparing development version 1.4.2-SNAPSHOT Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/bf8b47d1 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/bf8b47d1 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/bf8b47d1 Branch: refs/heads/branch-1.4 Commit: bf8b47d17b0ee2aa58a252cf6c2ddd7967334959 Parents: 3e8ae38 Author: Patrick Wendell pwend...@gmail.com Authored: Mon Jul 6 22:20:25 2015 -0700 Committer: Patrick Wendell pwend...@gmail.com Committed: Mon Jul 6 22:20:25 2015 -0700 -- assembly/pom.xml | 2 +- bagel/pom.xml | 2 +- core/pom.xml | 2 +- examples/pom.xml | 2 +- external/flume-sink/pom.xml | 2 +- external/flume/pom.xml| 2 +- external/kafka-assembly/pom.xml | 2 +- external/kafka/pom.xml| 2 +- external/mqtt/pom.xml | 2 +- external/twitter/pom.xml | 2 +- external/zeromq/pom.xml | 2 +- extras/java8-tests/pom.xml| 2 +- extras/kinesis-asl/pom.xml| 2 +- extras/spark-ganglia-lgpl/pom.xml | 2 +- graphx/pom.xml| 2 +- launcher/pom.xml | 2 +- mllib/pom.xml | 2 +- network/common/pom.xml| 2 +- network/shuffle/pom.xml | 2 +- network/yarn/pom.xml | 2 +- pom.xml | 2 +- repl/pom.xml | 2 +- sql/catalyst/pom.xml | 2 +- sql/core/pom.xml | 2 +- sql/hive-thriftserver/pom.xml | 2 +- sql/hive/pom.xml | 2 +- streaming/pom.xml | 2 +- tools/pom.xml | 2 +- unsafe/pom.xml| 2 +- yarn/pom.xml | 2 +- 30 files changed, 30 insertions(+), 30 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/bf8b47d1/assembly/pom.xml -- diff --git a/assembly/pom.xml b/assembly/pom.xml index ba233e7..228db59 100644 --- a/assembly/pom.xml +++ b/assembly/pom.xml @@ -21,7 +21,7 @@ parent groupIdorg.apache.spark/groupId artifactIdspark-parent_2.10/artifactId -version1.4.1/version +version1.4.2-SNAPSHOT/version relativePath../pom.xml/relativePath /parent http://git-wip-us.apache.org/repos/asf/spark/blob/bf8b47d1/bagel/pom.xml -- diff --git a/bagel/pom.xml b/bagel/pom.xml index c5e9183..ce791a6 100644 --- a/bagel/pom.xml +++ b/bagel/pom.xml @@ -21,7 +21,7 @@ parent groupIdorg.apache.spark/groupId artifactIdspark-parent_2.10/artifactId -version1.4.1/version +version1.4.2-SNAPSHOT/version relativePath../pom.xml/relativePath /parent http://git-wip-us.apache.org/repos/asf/spark/blob/bf8b47d1/core/pom.xml -- diff --git a/core/pom.xml b/core/pom.xml index f0d236d..176ea9b 100644 --- a/core/pom.xml +++ b/core/pom.xml @@ -21,7 +21,7 @@ parent groupIdorg.apache.spark/groupId artifactIdspark-parent_2.10/artifactId -version1.4.1/version +version1.4.2-SNAPSHOT/version relativePath../pom.xml/relativePath /parent http://git-wip-us.apache.org/repos/asf/spark/blob/bf8b47d1/examples/pom.xml -- diff --git a/examples/pom.xml b/examples/pom.xml index e9a9cc2..877c2fb 100644 --- a/examples/pom.xml +++ b/examples/pom.xml @@ -21,7 +21,7 @@ parent groupIdorg.apache.spark/groupId artifactIdspark-parent_2.10/artifactId -version1.4.1/version +version1.4.2-SNAPSHOT/version relativePath../pom.xml/relativePath /parent http://git-wip-us.apache.org/repos/asf/spark/blob/bf8b47d1/external/flume-sink/pom.xml -- diff --git a/external/flume-sink/pom.xml b/external/flume-sink/pom.xml index 7eae7a7..ad431fa 100644 --- a/external/flume-sink/pom.xml +++ b/external/flume-sink/pom.xml @@ -21,7 +21,7 @@ parent groupIdorg.apache.spark/groupId artifactIdspark-parent_2.10/artifactId -version1.4.1/version +version1.4.2-SNAPSHOT/version relativePath../../pom.xml/relativePath /parent http://git-wip-us.apache.org/repos/asf/spark/blob/bf8b47d1/external/flume/pom.xml -- diff --git a/external/flume/pom.xml b/external/flume/pom.xml index b3ad09a..9789435 100644 --- a/external/flume/pom.xml +++ b/external/flume/pom.xml @@ -21,7 +21,7 @@ parent groupIdorg.apache.spark/groupId artifactIdspark-parent_2.10/artifactId -version1.4.1/version +
Git Push Summary
Repository: spark Updated Tags: refs/tags/v1.4.1-rc3 [created] 3e8ae3894 - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[1/2] spark git commit: Preparing Spark release v1.4.1-rc3
Repository: spark Updated Branches: refs/heads/branch-1.4 397bafd7d - bf8b47d17 Preparing Spark release v1.4.1-rc3 Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/3e8ae389 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/3e8ae389 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/3e8ae389 Branch: refs/heads/branch-1.4 Commit: 3e8ae38944f13895daf328555c1ad22cd590b089 Parents: 397bafd Author: Patrick Wendell pwend...@gmail.com Authored: Mon Jul 6 22:20:19 2015 -0700 Committer: Patrick Wendell pwend...@gmail.com Committed: Mon Jul 6 22:20:19 2015 -0700 -- assembly/pom.xml | 2 +- bagel/pom.xml | 2 +- core/pom.xml | 2 +- examples/pom.xml | 2 +- external/flume-sink/pom.xml | 2 +- external/flume/pom.xml| 2 +- external/kafka-assembly/pom.xml | 2 +- external/kafka/pom.xml| 2 +- external/mqtt/pom.xml | 2 +- external/twitter/pom.xml | 2 +- external/zeromq/pom.xml | 2 +- extras/java8-tests/pom.xml| 2 +- extras/kinesis-asl/pom.xml| 2 +- extras/spark-ganglia-lgpl/pom.xml | 2 +- graphx/pom.xml| 2 +- launcher/pom.xml | 2 +- mllib/pom.xml | 2 +- network/common/pom.xml| 2 +- network/shuffle/pom.xml | 2 +- network/yarn/pom.xml | 2 +- pom.xml | 2 +- repl/pom.xml | 2 +- sql/catalyst/pom.xml | 2 +- sql/core/pom.xml | 2 +- sql/hive-thriftserver/pom.xml | 2 +- sql/hive/pom.xml | 2 +- streaming/pom.xml | 2 +- tools/pom.xml | 2 +- unsafe/pom.xml| 2 +- yarn/pom.xml | 2 +- 30 files changed, 30 insertions(+), 30 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/3e8ae389/assembly/pom.xml -- diff --git a/assembly/pom.xml b/assembly/pom.xml index 228db59..ba233e7 100644 --- a/assembly/pom.xml +++ b/assembly/pom.xml @@ -21,7 +21,7 @@ parent groupIdorg.apache.spark/groupId artifactIdspark-parent_2.10/artifactId -version1.4.2-SNAPSHOT/version +version1.4.1/version relativePath../pom.xml/relativePath /parent http://git-wip-us.apache.org/repos/asf/spark/blob/3e8ae389/bagel/pom.xml -- diff --git a/bagel/pom.xml b/bagel/pom.xml index ce791a6..c5e9183 100644 --- a/bagel/pom.xml +++ b/bagel/pom.xml @@ -21,7 +21,7 @@ parent groupIdorg.apache.spark/groupId artifactIdspark-parent_2.10/artifactId -version1.4.2-SNAPSHOT/version +version1.4.1/version relativePath../pom.xml/relativePath /parent http://git-wip-us.apache.org/repos/asf/spark/blob/3e8ae389/core/pom.xml -- diff --git a/core/pom.xml b/core/pom.xml index 176ea9b..f0d236d 100644 --- a/core/pom.xml +++ b/core/pom.xml @@ -21,7 +21,7 @@ parent groupIdorg.apache.spark/groupId artifactIdspark-parent_2.10/artifactId -version1.4.2-SNAPSHOT/version +version1.4.1/version relativePath../pom.xml/relativePath /parent http://git-wip-us.apache.org/repos/asf/spark/blob/3e8ae389/examples/pom.xml -- diff --git a/examples/pom.xml b/examples/pom.xml index 877c2fb..e9a9cc2 100644 --- a/examples/pom.xml +++ b/examples/pom.xml @@ -21,7 +21,7 @@ parent groupIdorg.apache.spark/groupId artifactIdspark-parent_2.10/artifactId -version1.4.2-SNAPSHOT/version +version1.4.1/version relativePath../pom.xml/relativePath /parent http://git-wip-us.apache.org/repos/asf/spark/blob/3e8ae389/external/flume-sink/pom.xml -- diff --git a/external/flume-sink/pom.xml b/external/flume-sink/pom.xml index ad431fa..7eae7a7 100644 --- a/external/flume-sink/pom.xml +++ b/external/flume-sink/pom.xml @@ -21,7 +21,7 @@ parent groupIdorg.apache.spark/groupId artifactIdspark-parent_2.10/artifactId -version1.4.2-SNAPSHOT/version +version1.4.1/version relativePath../../pom.xml/relativePath /parent http://git-wip-us.apache.org/repos/asf/spark/blob/3e8ae389/external/flume/pom.xml -- diff --git a/external/flume/pom.xml b/external/flume/pom.xml index 9789435..b3ad09a 100644 --- a/external/flume/pom.xml +++ b/external/flume/pom.xml @@ -21,7 +21,7 @@ parent groupIdorg.apache.spark/groupId
spark git commit: [SPARK-8831][SQL] Support AbstractDataType in TypeCollection.
Repository: spark Updated Branches: refs/heads/master 6d0411b4f - 86768b7b3 [SPARK-8831][SQL] Support AbstractDataType in TypeCollection. Otherwise it is impossible to declare an expression supporting DecimalType. Author: Reynold Xin r...@databricks.com Closes #7232 from rxin/typecollection-adt and squashes the following commits: 934d3d1 [Reynold Xin] [SPARK-8831][SQL] Support AbstractDataType in TypeCollection. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/86768b7b Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/86768b7b Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/86768b7b Branch: refs/heads/master Commit: 86768b7b3b0c2964e744bc491bc20a1d3140ce93 Parents: 6d0411b Author: Reynold Xin r...@databricks.com Authored: Sun Jul 5 23:54:25 2015 -0700 Committer: Reynold Xin r...@databricks.com Committed: Sun Jul 5 23:54:25 2015 -0700 -- .../spark/sql/catalyst/analysis/HiveTypeCoercion.scala| 2 -- .../org/apache/spark/sql/types/AbstractDataType.scala | 10 ++ .../sql/catalyst/analysis/HiveTypeCoercionSuite.scala | 6 ++ 3 files changed, 12 insertions(+), 6 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/86768b7b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/HiveTypeCoercion.scala -- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/HiveTypeCoercion.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/HiveTypeCoercion.scala index 84acc0e..5367b7f 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/HiveTypeCoercion.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/HiveTypeCoercion.scala @@ -708,8 +708,6 @@ object HiveTypeCoercion { case (NullType, target) = Cast(e, target.defaultConcreteType) // Implicit cast among numeric types -// If input is decimal, and we expect a decimal type, just use the input. -case (_: DecimalType, DecimalType) = e // If input is a numeric type but not decimal, and we expect a decimal type, // cast the input to unlimited precision decimal. case (_: NumericType, DecimalType) if !inType.isInstanceOf[DecimalType] = http://git-wip-us.apache.org/repos/asf/spark/blob/86768b7b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/AbstractDataType.scala -- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/AbstractDataType.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/AbstractDataType.scala index ffefb0e..fb1b47e 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/AbstractDataType.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/AbstractDataType.scala @@ -53,10 +53,12 @@ private[sql] abstract class AbstractDataType { * * This means that we prefer StringType over BinaryType if it is possible to cast to StringType. */ -private[sql] class TypeCollection(private val types: Seq[DataType]) extends AbstractDataType { +private[sql] class TypeCollection(private val types: Seq[AbstractDataType]) + extends AbstractDataType { + require(types.nonEmpty, sTypeCollection ($types) cannot be empty) - private[sql] override def defaultConcreteType: DataType = types.head + private[sql] override def defaultConcreteType: DataType = types.head.defaultConcreteType private[sql] override def isParentOf(childCandidate: DataType): Boolean = false @@ -68,9 +70,9 @@ private[sql] class TypeCollection(private val types: Seq[DataType]) extends Abst private[sql] object TypeCollection { - def apply(types: DataType*): TypeCollection = new TypeCollection(types) + def apply(types: AbstractDataType*): TypeCollection = new TypeCollection(types) - def unapply(typ: AbstractDataType): Option[Seq[DataType]] = typ match { + def unapply(typ: AbstractDataType): Option[Seq[AbstractDataType]] = typ match { case typ: TypeCollection = Some(typ.types) case _ = None } http://git-wip-us.apache.org/repos/asf/spark/blob/86768b7b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/HiveTypeCoercionSuite.scala -- diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/HiveTypeCoercionSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/HiveTypeCoercionSuite.scala index 67d05ab..b564266 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/HiveTypeCoercionSuite.scala +++