spark git commit: [SPARK-6893][ML] default pipeline parameter handling in python
Repository: spark Updated Branches: refs/heads/master 52c3439a8 - 57cd1e86d [SPARK-6893][ML] default pipeline parameter handling in python Same as #5431 but for Python. jkbradley Author: Xiangrui Meng m...@databricks.com Closes #5534 from mengxr/SPARK-6893 and squashes the following commits: d3b519b [Xiangrui Meng] address comments ebaccc6 [Xiangrui Meng] style update fce244e [Xiangrui Meng] update explainParams with test 4d6b07a [Xiangrui Meng] add tests 5294500 [Xiangrui Meng] update default param handling in python Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/57cd1e86 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/57cd1e86 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/57cd1e86 Branch: refs/heads/master Commit: 57cd1e86d1d450f85fc9e296aff498a940452113 Parents: 52c3439 Author: Xiangrui Meng m...@databricks.com Authored: Wed Apr 15 23:49:42 2015 -0700 Committer: Xiangrui Meng m...@databricks.com Committed: Wed Apr 15 23:49:42 2015 -0700 -- .../org/apache/spark/ml/Identifiable.scala | 2 +- .../org/apache/spark/ml/param/TestParams.scala | 9 +- python/pyspark/ml/classification.py | 3 +- python/pyspark/ml/feature.py| 19 +-- python/pyspark/ml/param/__init__.py | 146 --- python/pyspark/ml/param/_gen_shared_params.py | 98 - .../pyspark/ml/param/_shared_params_code_gen.py | 100 + python/pyspark/ml/param/shared.py | 106 +++--- python/pyspark/ml/pipeline.py | 6 +- python/pyspark/ml/tests.py | 52 ++- python/pyspark/ml/util.py | 4 +- python/pyspark/ml/wrapper.py| 2 +- 12 files changed, 348 insertions(+), 199 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/57cd1e86/mllib/src/main/scala/org/apache/spark/ml/Identifiable.scala -- diff --git a/mllib/src/main/scala/org/apache/spark/ml/Identifiable.scala b/mllib/src/main/scala/org/apache/spark/ml/Identifiable.scala index a500906..a1d4909 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/Identifiable.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/Identifiable.scala @@ -25,7 +25,7 @@ import java.util.UUID private[ml] trait Identifiable extends Serializable { /** - * A unique id for the object. The default implementation concatenates the class name, -, and 8 + * A unique id for the object. The default implementation concatenates the class name, _, and 8 * random hex chars. */ private[ml] val uid: String = http://git-wip-us.apache.org/repos/asf/spark/blob/57cd1e86/mllib/src/test/scala/org/apache/spark/ml/param/TestParams.scala -- diff --git a/mllib/src/test/scala/org/apache/spark/ml/param/TestParams.scala b/mllib/src/test/scala/org/apache/spark/ml/param/TestParams.scala index 8f9ab68..641b64b 100644 --- a/mllib/src/test/scala/org/apache/spark/ml/param/TestParams.scala +++ b/mllib/src/test/scala/org/apache/spark/ml/param/TestParams.scala @@ -17,16 +17,13 @@ package org.apache.spark.ml.param +import org.apache.spark.ml.param.shared.{HasInputCol, HasMaxIter} + /** A subclass of Params for testing. */ -class TestParams extends Params { +class TestParams extends Params with HasMaxIter with HasInputCol { - val maxIter = new IntParam(this, maxIter, max number of iterations) def setMaxIter(value: Int): this.type = { set(maxIter, value); this } - def getMaxIter: Int = getOrDefault(maxIter) - - val inputCol = new Param[String](this, inputCol, input column name) def setInputCol(value: String): this.type = { set(inputCol, value); this } - def getInputCol: String = getOrDefault(inputCol) setDefault(maxIter - 10) http://git-wip-us.apache.org/repos/asf/spark/blob/57cd1e86/python/pyspark/ml/classification.py -- diff --git a/python/pyspark/ml/classification.py b/python/pyspark/ml/classification.py index 7f42de5..d7bc09f 100644 --- a/python/pyspark/ml/classification.py +++ b/python/pyspark/ml/classification.py @@ -59,6 +59,7 @@ class LogisticRegression(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredicti maxIter=100, regParam=0.1) super(LogisticRegression, self).__init__() +self._setDefault(maxIter=100, regParam=0.1) kwargs = self.__init__._input_kwargs self.setParams(**kwargs) @@ -71,7 +72,7 @@ class LogisticRegression(JavaEstimator, HasFeaturesCol, HasLabelCol, HasPredicti Sets params for logistic regression. kwargs = self.setParams._input_kwargs -
spark git commit: SPARK-4783 [CORE] System.exit() calls in SparkContext disrupt applications embedding Spark
Repository: spark Updated Branches: refs/heads/master 837055059 - 6179a9483 SPARK-4783 [CORE] System.exit() calls in SparkContext disrupt applications embedding Spark Avoid `System.exit(1)` in `TaskSchedulerImpl` and convert to `SparkException`; ensure scheduler calls `sc.stop()` even when this exception is thrown. CC mateiz aarondav as those who may have last touched this code. Author: Sean Owen so...@cloudera.com Closes #5492 from srowen/SPARK-4783 and squashes the following commits: 60dc682 [Sean Owen] Avoid System.exit(1) in TaskSchedulerImpl and convert to SparkException; ensure scheduler calls sc.stop() even when this exception is thrown Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/6179a948 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/6179a948 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/6179a948 Branch: refs/heads/master Commit: 6179a948371897cecb7322ebda366c2de8ecaedd Parents: 8370550 Author: Sean Owen so...@cloudera.com Authored: Thu Apr 16 10:45:32 2015 +0100 Committer: Sean Owen so...@cloudera.com Committed: Thu Apr 16 10:45:32 2015 +0100 -- .../org/apache/spark/scheduler/TaskSchedulerImpl.scala | 5 ++--- .../scheduler/cluster/SparkDeploySchedulerBackend.scala | 9 ++--- 2 files changed, 8 insertions(+), 6 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/6179a948/core/src/main/scala/org/apache/spark/scheduler/TaskSchedulerImpl.scala -- diff --git a/core/src/main/scala/org/apache/spark/scheduler/TaskSchedulerImpl.scala b/core/src/main/scala/org/apache/spark/scheduler/TaskSchedulerImpl.scala index 2362cc7..ecc8bf1 100644 --- a/core/src/main/scala/org/apache/spark/scheduler/TaskSchedulerImpl.scala +++ b/core/src/main/scala/org/apache/spark/scheduler/TaskSchedulerImpl.scala @@ -394,7 +394,7 @@ private[spark] class TaskSchedulerImpl( def error(message: String) { synchronized { - if (activeTaskSets.size 0) { + if (activeTaskSets.nonEmpty) { // Have each task set throw a SparkException with the error for ((taskSetId, manager) - activeTaskSets) { try { @@ -407,8 +407,7 @@ private[spark] class TaskSchedulerImpl( // No task sets are active but we still got an error. Just exit since this // must mean the error is during registration. // It might be good to do something smarter here in the future. -logError(Exiting due to error from cluster scheduler: + message) -System.exit(1) +throw new SparkException(sExiting due to error from cluster scheduler: $message) } } } http://git-wip-us.apache.org/repos/asf/spark/blob/6179a948/core/src/main/scala/org/apache/spark/scheduler/cluster/SparkDeploySchedulerBackend.scala -- diff --git a/core/src/main/scala/org/apache/spark/scheduler/cluster/SparkDeploySchedulerBackend.scala b/core/src/main/scala/org/apache/spark/scheduler/cluster/SparkDeploySchedulerBackend.scala index ed5b7c1..ccf1dc5 100644 --- a/core/src/main/scala/org/apache/spark/scheduler/cluster/SparkDeploySchedulerBackend.scala +++ b/core/src/main/scala/org/apache/spark/scheduler/cluster/SparkDeploySchedulerBackend.scala @@ -118,9 +118,12 @@ private[spark] class SparkDeploySchedulerBackend( notifyContext() if (!stopping) { logError(Application has been killed. Reason: + reason) - scheduler.error(reason) - // Ensure the application terminates, as we can no longer run jobs. - sc.stop() + try { +scheduler.error(reason) + } finally { +// Ensure the application terminates, as we can no longer run jobs. +sc.stop() + } } } - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-4194] [core] Make SparkContext initialization exception-safe.
Repository: spark Updated Branches: refs/heads/master 6179a9483 - de4fa6b6d [SPARK-4194] [core] Make SparkContext initialization exception-safe. SparkContext has a very long constructor, where multiple things are initialized, multiple threads are spawned, and multiple opportunities for exceptions to be thrown exist. If one of these happens at an innoportune time, lots of garbage tends to stick around. This patch re-organizes SparkContext so that its internal state is initialized in a big try block. The fields keeping state are now completely private to SparkContext, and are vars, because Scala doesn't allow you to initialize a val later. The existing API interface is kept by turning vals into defs (which works because Scala guarantees the same binary interface for those). On top of that, a few things in other areas were changed to avoid more things leaking: - Executor was changed to explicitly wait for the heartbeat thread to stop. LocalBackend was changed to wait for the StopExecutor message to be received, since otherwise there could be a race between that message arriving and the actor system being shut down. - ConnectionManager could possibly hang during shutdown, because an interrupt at the wrong moment could cause the selector thread to still call select and then wait forever. So also wake up the selector so that this situation is avoided. Author: Marcelo Vanzin van...@cloudera.com Closes #5335 from vanzin/SPARK-4194 and squashes the following commits: 746b661 [Marcelo Vanzin] Fix borked merge. 80fc00e [Marcelo Vanzin] Merge branch 'master' into SPARK-4194 408dada [Marcelo Vanzin] Merge branch 'master' into SPARK-4194 2621609 [Marcelo Vanzin] Merge branch 'master' into SPARK-4194 6b73fcb [Marcelo Vanzin] Scalastyle. c671c46 [Marcelo Vanzin] Fix merge. 3979aad [Marcelo Vanzin] Merge branch 'master' into SPARK-4194 8caa8b3 [Marcelo Vanzin] [SPARK-4194] [core] Make SparkContext initialization exception-safe. 071f16e [Marcelo Vanzin] Nits. 27456b9 [Marcelo Vanzin] More exception safety. a0b0881 [Marcelo Vanzin] Stop alloc manager before scheduler. 5545d83 [Marcelo Vanzin] [SPARK-6650] [core] Stop ExecutorAllocationManager when context stops. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/de4fa6b6 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/de4fa6b6 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/de4fa6b6 Branch: refs/heads/master Commit: de4fa6b6d12e2bee0307ffba2abfca0c33f15e45 Parents: 6179a94 Author: Marcelo Vanzin van...@cloudera.com Authored: Thu Apr 16 10:48:31 2015 +0100 Committer: Sean Owen so...@cloudera.com Committed: Thu Apr 16 10:48:31 2015 +0100 -- .../scala/org/apache/spark/SparkContext.scala | 505 +++ .../org/apache/spark/executor/Executor.scala| 33 +- .../spark/network/nio/ConnectionManager.scala | 7 +- .../spark/scheduler/TaskSchedulerImpl.scala | 3 +- .../spark/scheduler/local/LocalBackend.scala| 19 +- .../spark/ExecutorAllocationManagerSuite.scala | 6 - 6 files changed, 329 insertions(+), 244 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/de4fa6b6/core/src/main/scala/org/apache/spark/SparkContext.scala -- diff --git a/core/src/main/scala/org/apache/spark/SparkContext.scala b/core/src/main/scala/org/apache/spark/SparkContext.scala index 3f1a7dd..e106c5c 100644 --- a/core/src/main/scala/org/apache/spark/SparkContext.scala +++ b/core/src/main/scala/org/apache/spark/SparkContext.scala @@ -31,6 +31,7 @@ import scala.collection.JavaConversions._ import scala.collection.generic.Growable import scala.collection.mutable.HashMap import scala.reflect.{ClassTag, classTag} +import scala.util.control.NonFatal import org.apache.hadoop.conf.Configuration import org.apache.hadoop.fs.Path @@ -50,9 +51,10 @@ import org.apache.spark.executor.{ExecutorEndpoint, TriggerThreadDump} import org.apache.spark.input.{StreamInputFormat, PortableDataStream, WholeTextFileInputFormat, FixedLengthBinaryInputFormat} import org.apache.spark.io.CompressionCodec +import org.apache.spark.metrics.MetricsSystem import org.apache.spark.partial.{ApproximateEvaluator, PartialResult} import org.apache.spark.rdd._ -import org.apache.spark.rpc.RpcAddress +import org.apache.spark.rpc.{RpcAddress, RpcEndpointRef} import org.apache.spark.scheduler._ import org.apache.spark.scheduler.cluster.{CoarseGrainedSchedulerBackend, SparkDeploySchedulerBackend, SimrSchedulerBackend} @@ -192,8 +194,42 @@ class SparkContext(config: SparkConf) extends Logging with ExecutorAllocationCli // log out Spark Version in Spark driver log logInfo(sRunning Spark version $SPARK_VERSION) - private[spark] val conf = config.clone() -
spark git commit: [Streaming][minor] Remove additional quote and unneeded imports
Repository: spark Updated Branches: refs/heads/master 57cd1e86d - 837055059 [Streaming][minor] Remove additional quote and unneeded imports Author: jerryshao saisai.s...@intel.com Closes #5540 from jerryshao/minor-fix and squashes the following commits: ebaa646 [jerryshao] Minor fix Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/83705505 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/83705505 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/83705505 Branch: refs/heads/master Commit: 8370550593f3549e90ace446961281dad0cd7498 Parents: 57cd1e8 Author: jerryshao saisai.s...@intel.com Authored: Thu Apr 16 10:39:02 2015 +0100 Committer: Sean Owen so...@cloudera.com Committed: Thu Apr 16 10:39:02 2015 +0100 -- .../apache/spark/examples/streaming/DirectKafkaWordCount.scala| 2 +- .../main/scala/org/apache/spark/streaming/kafka/KafkaRDD.scala| 3 +-- 2 files changed, 2 insertions(+), 3 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/83705505/examples/scala-2.10/src/main/scala/org/apache/spark/examples/streaming/DirectKafkaWordCount.scala -- diff --git a/examples/scala-2.10/src/main/scala/org/apache/spark/examples/streaming/DirectKafkaWordCount.scala b/examples/scala-2.10/src/main/scala/org/apache/spark/examples/streaming/DirectKafkaWordCount.scala index 1c8a20b..11a8cf0 100644 --- a/examples/scala-2.10/src/main/scala/org/apache/spark/examples/streaming/DirectKafkaWordCount.scala +++ b/examples/scala-2.10/src/main/scala/org/apache/spark/examples/streaming/DirectKafkaWordCount.scala @@ -41,7 +41,7 @@ object DirectKafkaWordCount { | brokers is a list of one or more Kafka brokers | topics is a list of one or more kafka topics to consume from | -.stripMargin) +.stripMargin) System.exit(1) } http://git-wip-us.apache.org/repos/asf/spark/blob/83705505/external/kafka/src/main/scala/org/apache/spark/streaming/kafka/KafkaRDD.scala -- diff --git a/external/kafka/src/main/scala/org/apache/spark/streaming/kafka/KafkaRDD.scala b/external/kafka/src/main/scala/org/apache/spark/streaming/kafka/KafkaRDD.scala index a0b8a0c..a1b4a12 100644 --- a/external/kafka/src/main/scala/org/apache/spark/streaming/kafka/KafkaRDD.scala +++ b/external/kafka/src/main/scala/org/apache/spark/streaming/kafka/KafkaRDD.scala @@ -23,10 +23,9 @@ import org.apache.spark.{Logging, Partition, SparkContext, SparkException, TaskC import org.apache.spark.rdd.RDD import org.apache.spark.util.NextIterator -import java.util.Properties import kafka.api.{FetchRequestBuilder, FetchResponse} import kafka.common.{ErrorMapping, TopicAndPartition} -import kafka.consumer.{ConsumerConfig, SimpleConsumer} +import kafka.consumer.SimpleConsumer import kafka.message.{MessageAndMetadata, MessageAndOffset} import kafka.serializer.Decoder import kafka.utils.VerifiableProperties - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-6694][SQL]SparkSQL CLI must be able to specify an option --database on the command line.
Repository: spark Updated Branches: refs/heads/master de4fa6b6d - 3ae37b93a [SPARK-6694][SQL]SparkSQL CLI must be able to specify an option --database on the command line. SparkSQL CLI has an option --database as follows. But, the option --database is ignored. ``` $ spark-sql --help : CLI options: : --database databasename Specify the database to use ``` Author: Jin Adachi adachij2...@yahoo.co.jp Author: adachij adac...@nttdata.co.jp Closes #5345 from adachij2002/SPARK-6694 and squashes the following commits: 8659084 [Jin Adachi] Merge branch 'master' of https://github.com/apache/spark into SPARK-6694 0301eb9 [Jin Adachi] Merge branch 'master' of https://github.com/apache/spark into SPARK-6694 df81086 [Jin Adachi] Modify code style. 846f83e [Jin Adachi] Merge branch 'master' of https://github.com/apache/spark into SPARK-6694 dbe8c63 [Jin Adachi] Change file permission to 644. 7b58f42 [Jin Adachi] Merge branch 'master' of https://github.com/apache/spark into SPARK-6694 c581d06 [Jin Adachi] Add an option --database test db56122 [Jin Adachi] Merge branch 'SPARK-6694' of https://github.com/adachij2002/spark into SPARK-6694 ee09fa5 [adachij] Merge branch 'master' into SPARK-6694 c804c03 [adachij] SparkSQL CLI must be able to specify an option --database on the command line. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/3ae37b93 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/3ae37b93 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/3ae37b93 Branch: refs/heads/master Commit: 3ae37b93a7c299bd8b22a36248035bca5de3422f Parents: de4fa6b Author: Jin Adachi adachij2...@yahoo.co.jp Authored: Thu Apr 16 23:41:04 2015 +0800 Committer: Cheng Lian l...@databricks.com Committed: Thu Apr 16 23:41:04 2015 +0800 -- .../hive/thriftserver/SparkSQLCLIDriver.scala | 3 ++ .../spark/sql/hive/thriftserver/CliSuite.scala | 45 2 files changed, 39 insertions(+), 9 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/3ae37b93/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala -- diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala index 62c061b..85281c6 100644 --- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala +++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala @@ -145,6 +145,9 @@ private[hive] object SparkSQLCLIDriver { case e: UnsupportedEncodingException = System.exit(3) } +// use the specified database if specified +cli.processSelectDatabase(sessionState); + // Execute -i init files (always in silent mode) cli.processInitFiles(sessionState) http://git-wip-us.apache.org/repos/asf/spark/blob/3ae37b93/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala -- diff --git a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala index 6d1d7c3..b070fa8 100644 --- a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala +++ b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala @@ -25,22 +25,31 @@ import scala.concurrent.{Await, Promise} import scala.sys.process.{Process, ProcessLogger} import org.apache.hadoop.hive.conf.HiveConf.ConfVars -import org.scalatest.{BeforeAndAfterAll, FunSuite} +import org.scalatest.{BeforeAndAfter, BeforeAndAfterAll, FunSuite} import org.apache.spark.Logging import org.apache.spark.util.Utils -class CliSuite extends FunSuite with BeforeAndAfterAll with Logging { +class CliSuite extends FunSuite with BeforeAndAfter with Logging { + val warehousePath = Utils.createTempDir() + val metastorePath = Utils.createTempDir() + + before { + warehousePath.delete() + metastorePath.delete() + } + + after { + warehousePath.delete() + metastorePath.delete() + } + def runCliWithin( timeout: FiniteDuration, extraArgs: Seq[String] = Seq.empty)( - queriesAndExpectedAnswers: (String, String)*) { + queriesAndExpectedAnswers: (String, String)*): Unit = { val (queries, expectedAnswers) = queriesAndExpectedAnswers.unzip -val warehousePath = Utils.createTempDir() -warehousePath.delete() -val metastorePath = Utils.createTempDir() -
spark git commit: [SPARK-6855] [SPARKR] Set R includes to get the right collate order.
Repository: spark Updated Branches: refs/heads/master ef3fb801a - 55f553a97 [SPARK-6855] [SPARKR] Set R includes to get the right collate order. This prevents tools like devtools::document creating invalid collate orders Author: Shivaram Venkataraman shiva...@cs.berkeley.edu Closes #5462 from shivaram/collate-order and squashes the following commits: f3db562 [Shivaram Venkataraman] Set R includes to get the right collate order. This prevents tools like devtools::document creating invalid collate orders Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/55f553a9 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/55f553a9 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/55f553a9 Branch: refs/heads/master Commit: 55f553a979db925aa0c3559f7e80b99d2bf3feb4 Parents: ef3fb80 Author: Shivaram Venkataraman shiva...@cs.berkeley.edu Authored: Thu Apr 16 13:06:34 2015 -0700 Committer: Shivaram Venkataraman shiva...@cs.berkeley.edu Committed: Thu Apr 16 13:06:34 2015 -0700 -- R/pkg/DESCRIPTION | 6 +++--- R/pkg/R/DataFrame.R | 2 +- R/pkg/R/column.R| 2 +- R/pkg/R/group.R | 3 +++ R/pkg/R/jobj.R | 3 +++ R/pkg/R/pairRDD.R | 2 ++ 6 files changed, 13 insertions(+), 5 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/55f553a9/R/pkg/DESCRIPTION -- diff --git a/R/pkg/DESCRIPTION b/R/pkg/DESCRIPTION index 1842b97..052f68c 100644 --- a/R/pkg/DESCRIPTION +++ b/R/pkg/DESCRIPTION @@ -17,19 +17,19 @@ License: Apache License (== 2.0) Collate: 'generics.R' 'jobj.R' -'SQLTypes.R' 'RDD.R' 'pairRDD.R' +'SQLTypes.R' 'column.R' 'group.R' 'DataFrame.R' 'SQLContext.R' +'backend.R' 'broadcast.R' +'client.R' 'context.R' 'deserialize.R' 'serialize.R' 'sparkR.R' -'backend.R' -'client.R' 'utils.R' 'zzz.R' http://git-wip-us.apache.org/repos/asf/spark/blob/55f553a9/R/pkg/R/DataFrame.R -- diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R index feafd56..044fdb4 100644 --- a/R/pkg/R/DataFrame.R +++ b/R/pkg/R/DataFrame.R @@ -17,7 +17,7 @@ # DataFrame.R - DataFrame class and methods implemented in S4 OO classes -#' @include jobj.R SQLTypes.R RDD.R pairRDD.R column.R group.R +#' @include generics.R jobj.R SQLTypes.R RDD.R pairRDD.R column.R group.R NULL setOldClass(jobj) http://git-wip-us.apache.org/repos/asf/spark/blob/55f553a9/R/pkg/R/column.R -- diff --git a/R/pkg/R/column.R b/R/pkg/R/column.R index e196305..b282001 100644 --- a/R/pkg/R/column.R +++ b/R/pkg/R/column.R @@ -17,7 +17,7 @@ # Column Class -#' @include generics.R jobj.R +#' @include generics.R jobj.R SQLTypes.R NULL setOldClass(jobj) http://git-wip-us.apache.org/repos/asf/spark/blob/55f553a9/R/pkg/R/group.R -- diff --git a/R/pkg/R/group.R b/R/pkg/R/group.R index 09fc0a7..855fbdf 100644 --- a/R/pkg/R/group.R +++ b/R/pkg/R/group.R @@ -17,6 +17,9 @@ # group.R - GroupedData class and methods implemented in S4 OO classes +#' @include generics.R jobj.R SQLTypes.R column.R +NULL + setOldClass(jobj) #' @title S4 class that represents a GroupedData http://git-wip-us.apache.org/repos/asf/spark/blob/55f553a9/R/pkg/R/jobj.R -- diff --git a/R/pkg/R/jobj.R b/R/pkg/R/jobj.R index 4180f14..a8a2523 100644 --- a/R/pkg/R/jobj.R +++ b/R/pkg/R/jobj.R @@ -18,6 +18,9 @@ # References to objects that exist on the JVM backend # are maintained using the jobj. +#' @include generics.R +NULL + # Maintain a reference count of Java object references # This allows us to GC the java object when it is safe .validJobjs - new.env(parent = emptyenv()) http://git-wip-us.apache.org/repos/asf/spark/blob/55f553a9/R/pkg/R/pairRDD.R -- diff --git a/R/pkg/R/pairRDD.R b/R/pkg/R/pairRDD.R index 739d399..5d64822 100644 --- a/R/pkg/R/pairRDD.R +++ b/R/pkg/R/pairRDD.R @@ -16,6 +16,8 @@ # # Operations supported on RDDs contains pairs (i.e key, value) +#' @include generics.R jobj.R RDD.R +NULL Actions and Transformations - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
Git Push Summary
Repository: spark Updated Tags: refs/tags/v1.3.0-rc3 [deleted] 4aaf48d46 - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
Git Push Summary
Repository: spark Updated Tags: refs/tags/v1.3.1-rc1 [deleted] 0dcb5d9f3 - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
Git Push Summary
Repository: spark Updated Tags: refs/tags/v1.3.1-rc3 [deleted] 3e8391327 - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
Git Push Summary
Repository: spark Updated Tags: refs/tags/v1.2.2 [created] 7531b50e4 - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
Git Push Summary
Repository: spark Updated Tags: refs/tags/v1.2.2-rc1 [deleted] 7531b50e4 - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
Git Push Summary
Repository: spark Updated Tags: refs/tags/v1.3.0-rc1 [deleted] f97b0d4a6 - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
svn commit: r8636 - /dev/spark/spark-1.2.2-rc1/ /release/spark/spark-1.2.2/
Author: pwendell Date: Thu Apr 16 22:59:24 2015 New Revision: 8636 Log: Spark 1.2.2 Release Added: release/spark/spark-1.2.2/ - copied from r8635, dev/spark/spark-1.2.2-rc1/ Removed: dev/spark/spark-1.2.2-rc1/ - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[6/6] spark git commit: [SPARK-4897] [PySpark] Python 3 support
[SPARK-4897] [PySpark] Python 3 support This PR update PySpark to support Python 3 (tested with 3.4). Known issue: unpickle array from Pyrolite is broken in Python 3, those tests are skipped. TODO: ec2/spark-ec2.py is not fully tested with python3. Author: Davies Liu dav...@databricks.com Author: twneale twne...@gmail.com Author: Josh Rosen joshro...@databricks.com Closes #5173 from davies/python3 and squashes the following commits: d7d6323 [Davies Liu] fix tests 6c52a98 [Davies Liu] fix mllib test 99e334f [Davies Liu] update timeout b716610 [Davies Liu] Merge branch 'master' of github.com:apache/spark into python3 cafd5ec [Davies Liu] adddress comments from @mengxr bf225d7 [Davies Liu] Merge branch 'master' of github.com:apache/spark into python3 179fc8d [Davies Liu] tuning flaky tests 8c8b957 [Davies Liu] fix ResourceWarning in Python 3 5c57c95 [Davies Liu] Merge branch 'master' of github.com:apache/spark into python3 4006829 [Davies Liu] fix test 2fc0066 [Davies Liu] add python3 path 71535e9 [Davies Liu] fix xrange and divide 5a55ab4 [Davies Liu] Merge branch 'master' of github.com:apache/spark into python3 125f12c [Davies Liu] Merge branch 'master' of github.com:apache/spark into python3 ed498c8 [Davies Liu] fix compatibility with python 3 820e649 [Davies Liu] Merge branch 'master' of github.com:apache/spark into python3 e8ce8c9 [Davies Liu] Merge branch 'master' of github.com:apache/spark into python3 ad7c374 [Davies Liu] fix mllib test and warning ef1fc2f [Davies Liu] fix tests 4eee14a [Davies Liu] Merge branch 'master' of github.com:apache/spark into python3 20112ff [Davies Liu] Merge branch 'master' of github.com:apache/spark into python3 59bb492 [Davies Liu] fix tests 1da268c [Davies Liu] Merge branch 'master' of github.com:apache/spark into python3 ca0fdd3 [Davies Liu] fix code style 9563a15 [Davies Liu] add imap back for python 2 0b1ec04 [Davies Liu] make python examples work with Python 3 d2fd566 [Davies Liu] Merge branch 'master' of github.com:apache/spark into python3 a716d34 [Davies Liu] test with python 3.4 f1700e8 [Davies Liu] fix test in python3 671b1db [Davies Liu] fix test in python3 692ff47 [Davies Liu] fix flaky test 7b9699f [Davies Liu] invalidate import cache for Python 3.3+ 9c58497 [Davies Liu] fix kill worker 309bfbf [Davies Liu] keep compatibility 5707476 [Davies Liu] cleanup, fix hash of string in 3.3+ 8662d5b [Davies Liu] Merge branch 'master' of github.com:apache/spark into python3 f53e1f0 [Davies Liu] fix tests 70b6b73 [Davies Liu] compile ec2/spark_ec2.py in python 3 a39167e [Davies Liu] support customize class in __main__ 814c77b [Davies Liu] run unittests with python 3 7f4476e [Davies Liu] mllib tests passed d737924 [Davies Liu] pass ml tests 375ea17 [Davies Liu] SQL tests pass 6cc42a9 [Davies Liu] rename 431a8de [Davies Liu] streaming tests pass 78901a7 [Davies Liu] fix hash of serializer in Python 3 24b2f2e [Davies Liu] pass all RDD tests 35f48fe [Davies Liu] run future again 1eebac2 [Davies Liu] fix conflict in ec2/spark_ec2.py 6e3c21d [Davies Liu] make cloudpickle work with Python3 2fb2db3 [Josh Rosen] Guard more changes behind sys.version; still doesn't run 1aa5e8f [twneale] Turned out `pickle.DictionaryType is dict` == True, so swapped it out 7354371 [twneale] buffer -- memoryview I'm not super sure if this a valid change, but the 2.7 docs recommend using memoryview over buffer where possible, so hoping it'll work. b69ccdf [twneale] Uses the pure python pickle._Pickler instead of c-extension _pickle.Pickler. It appears pyspark 2.7 uses the pure python pickler as well, so this shouldn't degrade pickling performance (?). f40d925 [twneale] xrange -- range e104215 [twneale] Replaces 2.7 types.InstsanceType with 3.4 `object`could be horribly wrong depending on how types.InstanceType is used elsewhere in the package--see http://bugs.python.org/issue8206 79de9d0 [twneale] Replaces python2.7 `file` with 3.4 _io.TextIOWrapper 2adb42d [Josh Rosen] Fix up some import differences between Python 2 and 3 854be27 [Josh Rosen] Run `futurize` on Python code: 7c5b4ce [Josh Rosen] Remove Python 3 check in shell.py. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/04e44b37 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/04e44b37 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/04e44b37 Branch: refs/heads/master Commit: 04e44b37cc04f62fbf9e08c7076349e0a4d12ea8 Parents: 55f553a Author: Davies Liu dav...@databricks.com Authored: Thu Apr 16 16:20:57 2015 -0700 Committer: Josh Rosen joshro...@databricks.com Committed: Thu Apr 16 16:20:57 2015 -0700 -- bin/pyspark |1 + bin/spark-submit|3 + bin/spark-submit2.cmd |3 + dev/run-tests |2 + dev/run-tests-jenkins
[2/6] spark git commit: [SPARK-4897] [PySpark] Python 3 support
http://git-wip-us.apache.org/repos/asf/spark/blob/04e44b37/python/pyspark/sql/types.py -- diff --git a/python/pyspark/sql/types.py b/python/pyspark/sql/types.py deleted file mode 100644 index ef76d84..000 --- a/python/pyspark/sql/types.py +++ /dev/null @@ -1,1252 +0,0 @@ -# -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the License); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -#http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an AS IS BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -import decimal -import datetime -import keyword -import warnings -import json -import re -import weakref -from array import array -from operator import itemgetter - - -__all__ = [ -DataType, NullType, StringType, BinaryType, BooleanType, DateType, -TimestampType, DecimalType, DoubleType, FloatType, ByteType, IntegerType, -LongType, ShortType, ArrayType, MapType, StructField, StructType] - - -class DataType(object): -Base class for data types. - -def __repr__(self): -return self.__class__.__name__ - -def __hash__(self): -return hash(str(self)) - -def __eq__(self, other): -return isinstance(other, self.__class__) and self.__dict__ == other.__dict__ - -def __ne__(self, other): -return not self.__eq__(other) - -@classmethod -def typeName(cls): -return cls.__name__[:-4].lower() - -def simpleString(self): -return self.typeName() - -def jsonValue(self): -return self.typeName() - -def json(self): -return json.dumps(self.jsonValue(), - separators=(',', ':'), - sort_keys=True) - - -# This singleton pattern does not work with pickle, you will get -# another object after pickle and unpickle -class PrimitiveTypeSingleton(type): -Metaclass for PrimitiveType - -_instances = {} - -def __call__(cls): -if cls not in cls._instances: -cls._instances[cls] = super(PrimitiveTypeSingleton, cls).__call__() -return cls._instances[cls] - - -class PrimitiveType(DataType): -Spark SQL PrimitiveType - -__metaclass__ = PrimitiveTypeSingleton - - -class NullType(PrimitiveType): -Null type. - -The data type representing None, used for the types that cannot be inferred. - - - -class StringType(PrimitiveType): -String data type. - - - -class BinaryType(PrimitiveType): -Binary (byte array) data type. - - - -class BooleanType(PrimitiveType): -Boolean data type. - - - -class DateType(PrimitiveType): -Date (datetime.date) data type. - - - -class TimestampType(PrimitiveType): -Timestamp (datetime.datetime) data type. - - - -class DecimalType(DataType): -Decimal (decimal.Decimal) data type. - - -def __init__(self, precision=None, scale=None): -self.precision = precision -self.scale = scale -self.hasPrecisionInfo = precision is not None - -def simpleString(self): -if self.hasPrecisionInfo: -return decimal(%d,%d) % (self.precision, self.scale) -else: -return decimal(10,0) - -def jsonValue(self): -if self.hasPrecisionInfo: -return decimal(%d,%d) % (self.precision, self.scale) -else: -return decimal - -def __repr__(self): -if self.hasPrecisionInfo: -return DecimalType(%d,%d) % (self.precision, self.scale) -else: -return DecimalType() - - -class DoubleType(PrimitiveType): -Double data type, representing double precision floats. - - - -class FloatType(PrimitiveType): -Float data type, representing single precision floats. - - - -class ByteType(PrimitiveType): -Byte data type, i.e. a signed integer in a single byte. - -def simpleString(self): -return 'tinyint' - - -class IntegerType(PrimitiveType): -Int data type, i.e. a signed 32-bit integer. - -def simpleString(self): -return 'int' - - -class LongType(PrimitiveType): -Long data type, i.e. a signed 64-bit integer. - -If the values are beyond the range of [-9223372036854775808, 9223372036854775807], -please use :class:`DecimalType`. - -def simpleString(self): -return 'bigint' - - -class ShortType(PrimitiveType): -Short data type, i.e. a signed 16-bit integer. -
spark git commit: [SPARK-6911] [SQL] improve accessor for nested types
Repository: spark Updated Branches: refs/heads/master 5fe434335 - 6183b5e2c [SPARK-6911] [SQL] improve accessor for nested types Support access columns by index in Python: ``` df[df[0] 3].collect() [Row(age=5, name=u'Bob')] ``` Access items in ArrayType or MapType ``` df.select(df.l.getItem(0), df.d.getItem(key)).show() df.select(df.l[0], df.d[key]).show() ``` Access field in StructType ``` df.select(df.r.getField(b)).show() df.select(df.r.a).show() ``` Author: Davies Liu dav...@databricks.com Closes #5513 from davies/access and squashes the following commits: e04d5a0 [Davies Liu] Update run-tests-jenkins 7ada9eb [Davies Liu] update timeout d125ac4 [Davies Liu] check column name, improve scala tests 6b62540 [Davies Liu] fix test db15b42 [Davies Liu] Merge branch 'master' of github.com:apache/spark into access 6c32e79 [Davies Liu] add scala tests 11f1df3 [Davies Liu] improve accessor for nested types Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/6183b5e2 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/6183b5e2 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/6183b5e2 Branch: refs/heads/master Commit: 6183b5e2caedd074073d0f6cb6609a634e2f5194 Parents: 5fe4343 Author: Davies Liu dav...@databricks.com Authored: Thu Apr 16 17:33:57 2015 -0700 Committer: Michael Armbrust mich...@databricks.com Committed: Thu Apr 16 17:33:57 2015 -0700 -- python/pyspark/sql/dataframe.py | 49 ++-- python/pyspark/sql/tests.py | 18 +++ .../scala/org/apache/spark/sql/Column.scala | 7 +-- .../org/apache/spark/sql/DataFrameSuite.scala | 6 +++ .../scala/org/apache/spark/sql/TestData.scala | 9 ++-- 5 files changed, 76 insertions(+), 13 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/6183b5e2/python/pyspark/sql/dataframe.py -- diff --git a/python/pyspark/sql/dataframe.py b/python/pyspark/sql/dataframe.py index d76504f..b9a3e6c 100644 --- a/python/pyspark/sql/dataframe.py +++ b/python/pyspark/sql/dataframe.py @@ -563,16 +563,23 @@ class DataFrame(object): [Row(name=u'Alice', age=2), Row(name=u'Bob', age=5)] df[ df.age 3 ].collect() [Row(age=5, name=u'Bob')] + df[df[0] 3].collect() +[Row(age=5, name=u'Bob')] if isinstance(item, basestring): +if item not in self.columns: +raise IndexError(no such column: %s % item) jc = self._jdf.apply(item) return Column(jc) elif isinstance(item, Column): return self.filter(item) -elif isinstance(item, list): +elif isinstance(item, (list, tuple)): return self.select(*item) +elif isinstance(item, int): +jc = self._jdf.apply(self.columns[item]) +return Column(jc) else: -raise IndexError(unexpected index: %s % item) +raise TypeError(unexpected type: %s % type(item)) def __getattr__(self, name): Returns the :class:`Column` denoted by ``name``. @@ -580,8 +587,8 @@ class DataFrame(object): df.select(df.age).collect() [Row(age=2), Row(age=5)] -if name.startswith(__): -raise AttributeError(name) +if name not in self.columns: +raise AttributeError(No such column: %s % name) jc = self._jdf.apply(name) return Column(jc) @@ -1093,7 +1100,39 @@ class Column(object): # container operators __contains__ = _bin_op(contains) __getitem__ = _bin_op(getItem) -getField = _bin_op(getField, An expression that gets a field by name in a StructField.) + +def getItem(self, key): +An expression that gets an item at position `ordinal` out of a list, + or gets an item by key out of a dict. + + df = sc.parallelize([([1, 2], {key: value})]).toDF([l, d]) + df.select(df.l.getItem(0), df.d.getItem(key)).show() +l[0] d[key] +1value + df.select(df.l[0], df.d[key]).show() +l[0] d[key] +1value + +return self[key] + +def getField(self, name): +An expression that gets a field by name in a StructField. + + from pyspark.sql import Row + df = sc.parallelize([Row(r=Row(a=1, b=b))]).toDF() + df.select(df.r.getField(b)).show() +r.b +b + df.select(df.r.a).show() +r.a +1 + +return Column(self._jc.getField(name)) + +def __getattr__(self, item): +if item.startswith(__): +raise AttributeError(item) +return self.getField(item) # string methods rlike = _bin_op(rlike)
spark git commit: [SQL][Minor] Fix foreachUp of treenode
Repository: spark Updated Branches: refs/heads/master 6183b5e2c - d96608674 [SQL][Minor] Fix foreachUp of treenode `foreachUp` should runs the given function recursively on [[children]] then on this node(just like transformUp). The current implementation does not follow this. This will leads to checkanalysis do not check from bottom of logical tree. Author: scwf wangf...@huawei.com Author: Fei Wang wangf...@huawei.com Closes #5518 from scwf/patch-1 and squashes the following commits: 18e28b2 [scwf] added a test case 1ccbfa8 [Fei Wang] fix foreachUp Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/d9660867 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/d9660867 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/d9660867 Branch: refs/heads/master Commit: d96608674f6c2ff3abb13c65d80c1a3872206710 Parents: 6183b5e Author: scwf wangf...@huawei.com Authored: Thu Apr 16 17:35:51 2015 -0700 Committer: Michael Armbrust mich...@databricks.com Committed: Thu Apr 16 17:35:51 2015 -0700 -- .../org/apache/spark/sql/catalyst/trees/TreeNode.scala | 2 +- .../apache/spark/sql/catalyst/trees/TreeNodeSuite.scala | 12 2 files changed, 13 insertions(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/d9660867/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala -- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala index a2df51e..97502ed 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala @@ -85,7 +85,7 @@ abstract class TreeNode[BaseType : TreeNode[BaseType]] { * @param f the function to be applied to each node in the tree. */ def foreachUp(f: BaseType = Unit): Unit = { -children.foreach(_.foreach(f)) +children.foreach(_.foreachUp(f)) f(this) } http://git-wip-us.apache.org/repos/asf/spark/blob/d9660867/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/trees/TreeNodeSuite.scala -- diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/trees/TreeNodeSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/trees/TreeNodeSuite.scala index 4eb8708..6b39332 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/trees/TreeNodeSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/trees/TreeNodeSuite.scala @@ -117,5 +117,17 @@ class TreeNodeSuite extends FunSuite { assert(transformed.origin.startPosition.isDefined) } + test(foreach up) { +val actual = new ArrayBuffer[String]() +val expected = Seq(1, 2, 3, 4, -, *, +) +val expression = Add(Literal(1), Multiply(Literal(2), Subtract(Literal(3), Literal(4 +expression foreachUp { + case b: BinaryExpression = actual.append(b.symbol); + case l: Literal = actual.append(l.toString); +} + +assert(expected === actual) + } + } - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
svn commit: r8638 - /dev/spark/spark-1.3.1-rc3/ /release/spark/spark-1.3.1/
Author: pwendell Date: Thu Apr 16 23:43:24 2015 New Revision: 8638 Log: Spark 1.3.1 Added: release/spark/spark-1.3.1/ - copied from r8637, dev/spark/spark-1.3.1-rc3/ Removed: dev/spark/spark-1.3.1-rc3/ - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[3/6] spark git commit: [SPARK-4897] [PySpark] Python 3 support
http://git-wip-us.apache.org/repos/asf/spark/blob/04e44b37/python/pyspark/sql/_types.py -- diff --git a/python/pyspark/sql/_types.py b/python/pyspark/sql/_types.py new file mode 100644 index 000..492c0cb --- /dev/null +++ b/python/pyspark/sql/_types.py @@ -0,0 +1,1261 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the License); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +#http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an AS IS BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import sys +import decimal +import datetime +import keyword +import warnings +import json +import re +import weakref +from array import array +from operator import itemgetter + +if sys.version = 3: +long = int +unicode = str + +__all__ = [ +DataType, NullType, StringType, BinaryType, BooleanType, DateType, +TimestampType, DecimalType, DoubleType, FloatType, ByteType, IntegerType, +LongType, ShortType, ArrayType, MapType, StructField, StructType] + + +class DataType(object): +Base class for data types. + +def __repr__(self): +return self.__class__.__name__ + +def __hash__(self): +return hash(str(self)) + +def __eq__(self, other): +return isinstance(other, self.__class__) and self.__dict__ == other.__dict__ + +def __ne__(self, other): +return not self.__eq__(other) + +@classmethod +def typeName(cls): +return cls.__name__[:-4].lower() + +def simpleString(self): +return self.typeName() + +def jsonValue(self): +return self.typeName() + +def json(self): +return json.dumps(self.jsonValue(), + separators=(',', ':'), + sort_keys=True) + + +# This singleton pattern does not work with pickle, you will get +# another object after pickle and unpickle +class PrimitiveTypeSingleton(type): +Metaclass for PrimitiveType + +_instances = {} + +def __call__(cls): +if cls not in cls._instances: +cls._instances[cls] = super(PrimitiveTypeSingleton, cls).__call__() +return cls._instances[cls] + + +class PrimitiveType(DataType): +Spark SQL PrimitiveType + +__metaclass__ = PrimitiveTypeSingleton + + +class NullType(PrimitiveType): +Null type. + +The data type representing None, used for the types that cannot be inferred. + + + +class StringType(PrimitiveType): +String data type. + + + +class BinaryType(PrimitiveType): +Binary (byte array) data type. + + + +class BooleanType(PrimitiveType): +Boolean data type. + + + +class DateType(PrimitiveType): +Date (datetime.date) data type. + + + +class TimestampType(PrimitiveType): +Timestamp (datetime.datetime) data type. + + + +class DecimalType(DataType): +Decimal (decimal.Decimal) data type. + + +def __init__(self, precision=None, scale=None): +self.precision = precision +self.scale = scale +self.hasPrecisionInfo = precision is not None + +def simpleString(self): +if self.hasPrecisionInfo: +return decimal(%d,%d) % (self.precision, self.scale) +else: +return decimal(10,0) + +def jsonValue(self): +if self.hasPrecisionInfo: +return decimal(%d,%d) % (self.precision, self.scale) +else: +return decimal + +def __repr__(self): +if self.hasPrecisionInfo: +return DecimalType(%d,%d) % (self.precision, self.scale) +else: +return DecimalType() + + +class DoubleType(PrimitiveType): +Double data type, representing double precision floats. + + + +class FloatType(PrimitiveType): +Float data type, representing single precision floats. + + + +class ByteType(PrimitiveType): +Byte data type, i.e. a signed integer in a single byte. + +def simpleString(self): +return 'tinyint' + + +class IntegerType(PrimitiveType): +Int data type, i.e. a signed 32-bit integer. + +def simpleString(self): +return 'int' + + +class LongType(PrimitiveType): +Long data type, i.e. a signed 64-bit integer. + +If the values are beyond the range of [-9223372036854775808, 9223372036854775807], +please use :class:`DecimalType`. + +def simpleString(self): +return 'bigint' + + +class
svn commit: r8637 - /dev/spark/spark-1.3.1-rc3/
Author: pwendell Date: Thu Apr 16 23:41:53 2015 New Revision: 8637 Log: Spark 1.3.1 RC3 Added: dev/spark/spark-1.3.1-rc3/ dev/spark/spark-1.3.1-rc3/spark-1.3.1-bin-cdh4.tgz (with props) dev/spark/spark-1.3.1-rc3/spark-1.3.1-bin-cdh4.tgz.asc (with props) dev/spark/spark-1.3.1-rc3/spark-1.3.1-bin-cdh4.tgz.md5 dev/spark/spark-1.3.1-rc3/spark-1.3.1-bin-cdh4.tgz.sha dev/spark/spark-1.3.1-rc3/spark-1.3.1-bin-hadoop1-scala2.11.tgz (with props) dev/spark/spark-1.3.1-rc3/spark-1.3.1-bin-hadoop1-scala2.11.tgz.asc (with props) dev/spark/spark-1.3.1-rc3/spark-1.3.1-bin-hadoop1-scala2.11.tgz.md5 dev/spark/spark-1.3.1-rc3/spark-1.3.1-bin-hadoop1-scala2.11.tgz.sha dev/spark/spark-1.3.1-rc3/spark-1.3.1-bin-hadoop1.tgz (with props) dev/spark/spark-1.3.1-rc3/spark-1.3.1-bin-hadoop1.tgz.asc (with props) dev/spark/spark-1.3.1-rc3/spark-1.3.1-bin-hadoop1.tgz.md5 dev/spark/spark-1.3.1-rc3/spark-1.3.1-bin-hadoop1.tgz.sha dev/spark/spark-1.3.1-rc3/spark-1.3.1-bin-hadoop2.3.tgz (with props) dev/spark/spark-1.3.1-rc3/spark-1.3.1-bin-hadoop2.3.tgz.asc (with props) dev/spark/spark-1.3.1-rc3/spark-1.3.1-bin-hadoop2.3.tgz.md5 dev/spark/spark-1.3.1-rc3/spark-1.3.1-bin-hadoop2.3.tgz.sha dev/spark/spark-1.3.1-rc3/spark-1.3.1-bin-hadoop2.4.tgz (with props) dev/spark/spark-1.3.1-rc3/spark-1.3.1-bin-hadoop2.4.tgz.asc (with props) dev/spark/spark-1.3.1-rc3/spark-1.3.1-bin-hadoop2.4.tgz.md5 dev/spark/spark-1.3.1-rc3/spark-1.3.1-bin-hadoop2.4.tgz.sha dev/spark/spark-1.3.1-rc3/spark-1.3.1-bin-hadoop2.6.tgz (with props) dev/spark/spark-1.3.1-rc3/spark-1.3.1-bin-hadoop2.6.tgz.asc (with props) dev/spark/spark-1.3.1-rc3/spark-1.3.1-bin-hadoop2.6.tgz.md5 dev/spark/spark-1.3.1-rc3/spark-1.3.1-bin-hadoop2.6.tgz.sha dev/spark/spark-1.3.1-rc3/spark-1.3.1.tgz (with props) dev/spark/spark-1.3.1-rc3/spark-1.3.1.tgz.asc (with props) dev/spark/spark-1.3.1-rc3/spark-1.3.1.tgz.md5 dev/spark/spark-1.3.1-rc3/spark-1.3.1.tgz.sha Added: dev/spark/spark-1.3.1-rc3/spark-1.3.1-bin-cdh4.tgz == Binary file - no diff available. Propchange: dev/spark/spark-1.3.1-rc3/spark-1.3.1-bin-cdh4.tgz -- svn:mime-type = application/x-gzip Added: dev/spark/spark-1.3.1-rc3/spark-1.3.1-bin-cdh4.tgz.asc == Binary file - no diff available. Propchange: dev/spark/spark-1.3.1-rc3/spark-1.3.1-bin-cdh4.tgz.asc -- svn:mime-type = application/pgp-signature Added: dev/spark/spark-1.3.1-rc3/spark-1.3.1-bin-cdh4.tgz.md5 == --- dev/spark/spark-1.3.1-rc3/spark-1.3.1-bin-cdh4.tgz.md5 (added) +++ dev/spark/spark-1.3.1-rc3/spark-1.3.1-bin-cdh4.tgz.md5 Thu Apr 16 23:41:53 2015 @@ -0,0 +1 @@ +spark-1.3.1-bin-cdh4.tgz: 2B 83 92 AA AB 66 FF 1A 78 EC FD 58 7E AF 87 64 Added: dev/spark/spark-1.3.1-rc3/spark-1.3.1-bin-cdh4.tgz.sha == --- dev/spark/spark-1.3.1-rc3/spark-1.3.1-bin-cdh4.tgz.sha (added) +++ dev/spark/spark-1.3.1-rc3/spark-1.3.1-bin-cdh4.tgz.sha Thu Apr 16 23:41:53 2015 @@ -0,0 +1,3 @@ +spark-1.3.1-bin-cdh4.tgz: 0C9F7857 E426F5AA 5F1AC5FB CA6DE120 6B250100 BC729749 + 3D06DEA8 E77366DB 17F2138E B30618A6 0D19CDC7 3C835028 + 13077F23 C95980A3 3B986EF2 221AC851 Added: dev/spark/spark-1.3.1-rc3/spark-1.3.1-bin-hadoop1-scala2.11.tgz == Binary file - no diff available. Propchange: dev/spark/spark-1.3.1-rc3/spark-1.3.1-bin-hadoop1-scala2.11.tgz -- svn:mime-type = application/x-gzip Added: dev/spark/spark-1.3.1-rc3/spark-1.3.1-bin-hadoop1-scala2.11.tgz.asc == Binary file - no diff available. Propchange: dev/spark/spark-1.3.1-rc3/spark-1.3.1-bin-hadoop1-scala2.11.tgz.asc -- svn:mime-type = application/pgp-signature Added: dev/spark/spark-1.3.1-rc3/spark-1.3.1-bin-hadoop1-scala2.11.tgz.md5 == --- dev/spark/spark-1.3.1-rc3/spark-1.3.1-bin-hadoop1-scala2.11.tgz.md5 (added) +++ dev/spark/spark-1.3.1-rc3/spark-1.3.1-bin-hadoop1-scala2.11.tgz.md5 Thu Apr 16 23:41:53 2015 @@ -0,0 +1,2 @@ +spark-1.3.1-bin-hadoop1-scala2.11.tgz: 7F B9 15 00 43 42 82 92 FD C7 13 69 F6 + B7 9D B6 Added:
spark git commit: [SPARK-6972][SQL] Add Coalesce to DataFrame
Repository: spark Updated Branches: refs/heads/master e5949c287 - 8220d5265 [SPARK-6972][SQL] Add Coalesce to DataFrame Author: Michael Armbrust mich...@databricks.com Closes #5545 from marmbrus/addCoalesce and squashes the following commits: 9fdf3f6 [Michael Armbrust] [SPARK-6972][SQL] Add Coalesce to DataFrame Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/8220d526 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/8220d526 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/8220d526 Branch: refs/heads/master Commit: 8220d5265f1bbea9dfdaeec4f2d06d7fe24c0bc3 Parents: e5949c2 Author: Michael Armbrust mich...@databricks.com Authored: Thu Apr 16 21:49:26 2015 -0500 Committer: Reynold Xin r...@databricks.com Committed: Thu Apr 16 21:49:26 2015 -0500 -- .../main/scala/org/apache/spark/sql/DataFrame.scala | 14 ++ .../src/main/scala/org/apache/spark/sql/RDDApi.scala | 2 ++ .../scala/org/apache/spark/sql/DataFrameSuite.scala | 8 3 files changed, 24 insertions(+) -- http://git-wip-us.apache.org/repos/asf/spark/blob/8220d526/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala -- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala index 3235f85..17c21f6 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala @@ -909,6 +909,20 @@ class DataFrame private[sql]( } /** + * Returns a new [[DataFrame]] that has exactly `numPartitions` partitions. + * Similar to coalesce defined on an [[RDD]], this operation results in a narrow dependency, e.g. + * if you go from 1000 partitions to 100 partitions, there will not be a shuffle, instead each of + * the 100 new partitions will claim 10 of the current partitions. + * @group rdd + */ + override def coalesce(numPartitions: Int): DataFrame = { +sqlContext.createDataFrame( + queryExecution.toRdd.coalesce(numPartitions), + schema, + needsConversion = false) + } + + /** * Returns a new [[DataFrame]] that contains only the unique rows from this [[DataFrame]]. * @group dfops */ http://git-wip-us.apache.org/repos/asf/spark/blob/8220d526/sql/core/src/main/scala/org/apache/spark/sql/RDDApi.scala -- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/RDDApi.scala b/sql/core/src/main/scala/org/apache/spark/sql/RDDApi.scala index ba4373f..63dbab1 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/RDDApi.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/RDDApi.scala @@ -61,5 +61,7 @@ private[sql] trait RDDApi[T] { def repartition(numPartitions: Int): DataFrame + def coalesce(numPartitions: Int): DataFrame + def distinct: DataFrame } http://git-wip-us.apache.org/repos/asf/spark/blob/8220d526/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala -- diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala index 44a7d1e..3250ab4 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala @@ -178,6 +178,14 @@ class DataFrameSuite extends QueryTest { testData.select('key).collect().toSeq) } + test(coalesce) { +assert(testData.select('key).coalesce(1).rdd.partitions.size === 1) + +checkAnswer( + testData.select('key).coalesce(1).select('key), + testData.select('key).collect().toSeq) + } + test(groupBy) { checkAnswer( testData2.groupBy(a).agg($a, sum($b)), - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-6899][SQL] Fix type mismatch when using codegen with Average on DecimalType
Repository: spark Updated Branches: refs/heads/master d96608674 - 1e43851d6 [SPARK-6899][SQL] Fix type mismatch when using codegen with Average on DecimalType JIRA https://issues.apache.org/jira/browse/SPARK-6899 Author: Liang-Chi Hsieh vii...@gmail.com Closes #5517 from viirya/fix_codegen_average and squashes the following commits: 8ae5f65 [Liang-Chi Hsieh] Add the case of DecimalType.Unlimited to Average. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/1e43851d Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/1e43851d Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/1e43851d Branch: refs/heads/master Commit: 1e43851d6455f65b850ea0327d0e92f65395d23f Parents: d966086 Author: Liang-Chi Hsieh vii...@gmail.com Authored: Thu Apr 16 17:50:20 2015 -0700 Committer: Michael Armbrust mich...@databricks.com Committed: Thu Apr 16 17:50:20 2015 -0700 -- .../apache/spark/sql/catalyst/expressions/aggregates.scala | 2 +- .../test/scala/org/apache/spark/sql/DataFrameSuite.scala| 9 + 2 files changed, 10 insertions(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/1e43851d/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregates.scala -- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregates.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregates.scala index 14a8550..f3830c6 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregates.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregates.scala @@ -326,7 +326,7 @@ case class Average(child: Expression) extends PartialAggregate with trees.UnaryN override def asPartial: SplitEvaluation = { child.dataType match { - case DecimalType.Fixed(_, _) = + case DecimalType.Fixed(_, _) | DecimalType.Unlimited = // Turn the child to unlimited decimals for calculation, before going back to fixed val partialSum = Alias(Sum(Cast(child, DecimalType.Unlimited)), PartialSum)() val partialCount = Alias(Count(child), PartialCount)() http://git-wip-us.apache.org/repos/asf/spark/blob/1e43851d/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala -- diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala index 34b2cb0..44a7d1e 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala @@ -537,4 +537,13 @@ class DataFrameSuite extends QueryTest { val df = TestSQLContext.createDataFrame(rowRDD, schema) df.rdd.collect() } + + test(SPARK-6899) { +val originalValue = TestSQLContext.conf.codegenEnabled +TestSQLContext.setConf(SQLConf.CODEGEN_ENABLED, true) +checkAnswer( + decimalData.agg(avg('a)), + Row(new java.math.BigDecimal(2.0))) +TestSQLContext.setConf(SQLConf.CODEGEN_ENABLED, originalValue.toString) + } } - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
svn commit: r8635 - /dev/spark/spark-1.2.2-rc1/
Author: pwendell Date: Thu Apr 16 22:58:43 2015 New Revision: 8635 Log: Adding Spark 1.2.2 RC1 Added: dev/spark/spark-1.2.2-rc1/ dev/spark/spark-1.2.2-rc1/spark-1.2.2-bin-cdh4.tgz (with props) dev/spark/spark-1.2.2-rc1/spark-1.2.2-bin-cdh4.tgz.asc (with props) dev/spark/spark-1.2.2-rc1/spark-1.2.2-bin-cdh4.tgz.md5 dev/spark/spark-1.2.2-rc1/spark-1.2.2-bin-cdh4.tgz.sha dev/spark/spark-1.2.2-rc1/spark-1.2.2-bin-hadoop1-scala2.11.tgz (with props) dev/spark/spark-1.2.2-rc1/spark-1.2.2-bin-hadoop1-scala2.11.tgz.asc (with props) dev/spark/spark-1.2.2-rc1/spark-1.2.2-bin-hadoop1-scala2.11.tgz.md5 dev/spark/spark-1.2.2-rc1/spark-1.2.2-bin-hadoop1-scala2.11.tgz.sha dev/spark/spark-1.2.2-rc1/spark-1.2.2-bin-hadoop1.tgz (with props) dev/spark/spark-1.2.2-rc1/spark-1.2.2-bin-hadoop1.tgz.asc (with props) dev/spark/spark-1.2.2-rc1/spark-1.2.2-bin-hadoop1.tgz.md5 dev/spark/spark-1.2.2-rc1/spark-1.2.2-bin-hadoop1.tgz.sha dev/spark/spark-1.2.2-rc1/spark-1.2.2-bin-hadoop2.3.tgz (with props) dev/spark/spark-1.2.2-rc1/spark-1.2.2-bin-hadoop2.3.tgz.asc (with props) dev/spark/spark-1.2.2-rc1/spark-1.2.2-bin-hadoop2.3.tgz.md5 dev/spark/spark-1.2.2-rc1/spark-1.2.2-bin-hadoop2.3.tgz.sha dev/spark/spark-1.2.2-rc1/spark-1.2.2.tgz (with props) dev/spark/spark-1.2.2-rc1/spark-1.2.2.tgz.asc (with props) dev/spark/spark-1.2.2-rc1/spark-1.2.2.tgz.md5 dev/spark/spark-1.2.2-rc1/spark-1.2.2.tgz.sha Added: dev/spark/spark-1.2.2-rc1/spark-1.2.2-bin-cdh4.tgz == Binary file - no diff available. Propchange: dev/spark/spark-1.2.2-rc1/spark-1.2.2-bin-cdh4.tgz -- svn:mime-type = application/x-gzip Added: dev/spark/spark-1.2.2-rc1/spark-1.2.2-bin-cdh4.tgz.asc == Binary file - no diff available. Propchange: dev/spark/spark-1.2.2-rc1/spark-1.2.2-bin-cdh4.tgz.asc -- svn:mime-type = application/pgp-signature Added: dev/spark/spark-1.2.2-rc1/spark-1.2.2-bin-cdh4.tgz.md5 == --- dev/spark/spark-1.2.2-rc1/spark-1.2.2-bin-cdh4.tgz.md5 (added) +++ dev/spark/spark-1.2.2-rc1/spark-1.2.2-bin-cdh4.tgz.md5 Thu Apr 16 22:58:43 2015 @@ -0,0 +1 @@ +spark-1.2.2-bin-cdh4.tgz: C2 67 26 E8 B8 EE 69 B2 BC 41 20 58 04 6E D4 CA Added: dev/spark/spark-1.2.2-rc1/spark-1.2.2-bin-cdh4.tgz.sha == --- dev/spark/spark-1.2.2-rc1/spark-1.2.2-bin-cdh4.tgz.sha (added) +++ dev/spark/spark-1.2.2-rc1/spark-1.2.2-bin-cdh4.tgz.sha Thu Apr 16 22:58:43 2015 @@ -0,0 +1,3 @@ +spark-1.2.2-bin-cdh4.tgz: 7D4D1D7F 4BA5AABD 1840FE52 BF00B964 461BB139 934C659B + 5302BCA7 E6F1BB5C 01078526 5DCCD7EA ED173EAB 6FEA8A61 + F4988E1F C1AB232D 3088BBAF 800C9E9D Added: dev/spark/spark-1.2.2-rc1/spark-1.2.2-bin-hadoop1-scala2.11.tgz == Binary file - no diff available. Propchange: dev/spark/spark-1.2.2-rc1/spark-1.2.2-bin-hadoop1-scala2.11.tgz -- svn:mime-type = application/x-gzip Added: dev/spark/spark-1.2.2-rc1/spark-1.2.2-bin-hadoop1-scala2.11.tgz.asc == Binary file - no diff available. Propchange: dev/spark/spark-1.2.2-rc1/spark-1.2.2-bin-hadoop1-scala2.11.tgz.asc -- svn:mime-type = application/pgp-signature Added: dev/spark/spark-1.2.2-rc1/spark-1.2.2-bin-hadoop1-scala2.11.tgz.md5 == --- dev/spark/spark-1.2.2-rc1/spark-1.2.2-bin-hadoop1-scala2.11.tgz.md5 (added) +++ dev/spark/spark-1.2.2-rc1/spark-1.2.2-bin-hadoop1-scala2.11.tgz.md5 Thu Apr 16 22:58:43 2015 @@ -0,0 +1,2 @@ +spark-1.2.2-bin-hadoop1-scala2.11.tgz: B0 FC 04 2C 6E 32 46 B1 F8 E4 CC 23 73 + BB 31 40 Added: dev/spark/spark-1.2.2-rc1/spark-1.2.2-bin-hadoop1-scala2.11.tgz.sha == --- dev/spark/spark-1.2.2-rc1/spark-1.2.2-bin-hadoop1-scala2.11.tgz.sha (added) +++ dev/spark/spark-1.2.2-rc1/spark-1.2.2-bin-hadoop1-scala2.11.tgz.sha Thu Apr 16 22:58:43 2015 @@ -0,0 +1,4 @@ +spark-1.2.2-bin-hadoop1-scala2.11.tgz: A8383D59 4046DCDA DE025F4E 943A667D + 9DB4B545 ED0C4FEA 0B7A9A6D DD9D3CA0 + 3CC41259 B29EBA5F DA85D414 2BFAFC85 +
spark git commit: [SPARK-6966][SQL] Use correct ClassLoader for JDBC Driver
Repository: spark Updated Branches: refs/heads/master 1e43851d6 - e5949c287 [SPARK-6966][SQL] Use correct ClassLoader for JDBC Driver Otherwise we cannot add jars with drivers after the fact. Author: Michael Armbrust mich...@databricks.com Closes #5543 from marmbrus/jdbcClassloader and squashes the following commits: d9930f3 [Michael Armbrust] fix imports 73d0614 [Michael Armbrust] [SPARK-6966][SQL] Use correct ClassLoader for JDBC Driver Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/e5949c28 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/e5949c28 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/e5949c28 Branch: refs/heads/master Commit: e5949c287ed19e78b6eecc61c3e88a07ad452eb9 Parents: 1e43851 Author: Michael Armbrust mich...@databricks.com Authored: Thu Apr 16 17:59:49 2015 -0700 Committer: Michael Armbrust mich...@databricks.com Committed: Thu Apr 16 17:59:49 2015 -0700 -- .../src/main/scala/org/apache/spark/sql/jdbc/JDBCRelation.scala | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/e5949c28/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JDBCRelation.scala -- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JDBCRelation.scala b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JDBCRelation.scala index 99b755c..5f48008 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JDBCRelation.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JDBCRelation.scala @@ -28,6 +28,7 @@ import org.apache.spark.sql.SQLContext import org.apache.spark.sql.catalyst.expressions.Row import org.apache.spark.sql.sources._ import org.apache.spark.sql.types.StructType +import org.apache.spark.util.Utils /** * Data corresponding to one partition of a JDBCRDD. @@ -99,7 +100,7 @@ private[sql] class DefaultSource extends RelationProvider { val upperBound = parameters.getOrElse(upperBound, null) val numPartitions = parameters.getOrElse(numPartitions, null) -if (driver != null) Class.forName(driver) +if (driver != null) Utils.getContextOrSparkClassLoader.loadClass(driver) if (partitionColumn != null (lowerBound == null || upperBound == null || numPartitions == null)) { - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
Git Push Summary
Repository: spark Updated Tags: refs/tags/v1.2.1-rc3 [deleted] b6eaf77d4 - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
Git Push Summary
Repository: spark Updated Tags: refs/tags/v1.3.0-rc2 [deleted] 3af26870e - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
Git Push Summary
Repository: spark Updated Tags: refs/tags/v1.3.0-snapshot1 [deleted] d97bfc6f2 - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
Git Push Summary
Repository: spark Updated Tags: refs/tags/v1.3.1-rc2 [deleted] 7c4473aa5 - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: SPARK-6927 [SQL] Sorting Error when codegen on
Repository: spark Updated Branches: refs/heads/master 04e44b37c - 5fe434335 SPARK-6927 [SQL] Sorting Error when codegen on Fix this error by adding BinaryType comparor in GenerateOrdering. JIRA https://issues.apache.org/jira/browse/SPARK-6927 Author: äºå³¤ chensong...@alibaba-inc.com Closes #5524 from kaka1992/fix-codegen-sort and squashes the following commits: d7e2afe [äºå³¤] fix codegen sorting error Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/5fe43433 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/5fe43433 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/5fe43433 Branch: refs/heads/master Commit: 5fe43433529346788e8c343d338a5b7dc169cf58 Parents: 04e44b3 Author: äºå³¤ chensong...@alibaba-inc.com Authored: Thu Apr 16 17:32:42 2015 -0700 Committer: Michael Armbrust mich...@databricks.com Committed: Thu Apr 16 17:32:42 2015 -0700 -- .../expressions/codegen/GenerateOrdering.scala | 14 +- .../org/apache/spark/sql/SQLQuerySuite.scala| 20 2 files changed, 33 insertions(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/5fe43433/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateOrdering.scala -- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateOrdering.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateOrdering.scala index 0db29eb..fc2a2b6 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateOrdering.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateOrdering.scala @@ -19,7 +19,7 @@ package org.apache.spark.sql.catalyst.expressions.codegen import org.apache.spark.Logging import org.apache.spark.sql.catalyst.expressions._ -import org.apache.spark.sql.types.{StringType, NumericType} +import org.apache.spark.sql.types.{BinaryType, StringType, NumericType} /** * Generates bytecode for an [[Ordering]] of [[Row Rows]] for a given set of @@ -43,6 +43,18 @@ object GenerateOrdering extends CodeGenerator[Seq[SortOrder], Ordering[Row]] wit val evalB = expressionEvaluator(order.child) val compare = order.child.dataType match { +case BinaryType = + q + val x = ${if (order.direction == Ascending) evalA.primitiveTerm else evalB.primitiveTerm} + val y = ${if (order.direction != Ascending) evalB.primitiveTerm else evalA.primitiveTerm} + var i = 0 + while (i x.length i y.length) { +val res = x(i).compareTo(y(i)) +if (res != 0) return res +i = i+1 + } + return x.length - y.length + case _: NumericType = q val comp = ${evalA.primitiveTerm} - ${evalB.primitiveTerm} http://git-wip-us.apache.org/repos/asf/spark/blob/5fe43433/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala -- diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala index d739e55..9e02e69 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala @@ -398,6 +398,26 @@ class SQLQuerySuite extends QueryTest with BeforeAndAfterAll { setConf(SQLConf.EXTERNAL_SORT, before.toString) } + test(SPARK-6927 sorting with codegen on) { +val externalbefore = conf.externalSortEnabled +val codegenbefore = conf.codegenEnabled +setConf(SQLConf.EXTERNAL_SORT, false) +setConf(SQLConf.CODEGEN_ENABLED, true) +sortTest() +setConf(SQLConf.EXTERNAL_SORT, externalbefore.toString) +setConf(SQLConf.CODEGEN_ENABLED, codegenbefore.toString) + } + + test(SPARK-6927 external sorting with codegen on) { +val externalbefore = conf.externalSortEnabled +val codegenbefore = conf.codegenEnabled +setConf(SQLConf.CODEGEN_ENABLED, true) +setConf(SQLConf.EXTERNAL_SORT, true) +sortTest() +setConf(SQLConf.EXTERNAL_SORT, externalbefore.toString) +setConf(SQLConf.CODEGEN_ENABLED, codegenbefore.toString) + } + test(limit) { checkAnswer( sql(SELECT * FROM testData LIMIT 10), - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-6934][Core] Use 'spark.akka.askTimeout' for the ask timeout
Repository: spark Updated Branches: refs/heads/master 3ae37b93a - ef3fb801a [SPARK-6934][Core] Use 'spark.akka.askTimeout' for the ask timeout Fixed my mistake in #4588 Author: zsxwing zsxw...@gmail.com Closes #5529 from zsxwing/SPARK-6934 and squashes the following commits: 9890b2d [zsxwing] Use 'spark.akka.askTimeout' for the ask timeout Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/ef3fb801 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/ef3fb801 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/ef3fb801 Branch: refs/heads/master Commit: ef3fb801ae971656ed9cd1b0ab95bc5a1548adbd Parents: 3ae37b9 Author: zsxwing zsxw...@gmail.com Authored: Thu Apr 16 13:45:55 2015 -0500 Committer: Reynold Xin r...@databricks.com Committed: Thu Apr 16 13:45:55 2015 -0500 -- core/src/main/scala/org/apache/spark/rpc/RpcEnv.scala | 7 --- 1 file changed, 4 insertions(+), 3 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/ef3fb801/core/src/main/scala/org/apache/spark/rpc/RpcEnv.scala -- diff --git a/core/src/main/scala/org/apache/spark/rpc/RpcEnv.scala b/core/src/main/scala/org/apache/spark/rpc/RpcEnv.scala index e259867..f2c1c86 100644 --- a/core/src/main/scala/org/apache/spark/rpc/RpcEnv.scala +++ b/core/src/main/scala/org/apache/spark/rpc/RpcEnv.scala @@ -284,7 +284,7 @@ private[spark] abstract class RpcEndpointRef(@transient conf: SparkConf) private[this] val maxRetries = conf.getInt(spark.akka.num.retries, 3) private[this] val retryWaitMs = conf.getLong(spark.akka.retry.wait, 3000) - private[this] val defaultTimeout = conf.getLong(spark.akka.lookupTimeout, 30) seconds + private[this] val defaultAskTimeout = conf.getLong(spark.akka.askTimeout, 30) seconds /** * return the address for the [[RpcEndpointRef]] @@ -304,7 +304,8 @@ private[spark] abstract class RpcEndpointRef(@transient conf: SparkConf) * * This method only sends the message once and never retries. */ - def sendWithReply[T: ClassTag](message: Any): Future[T] = sendWithReply(message, defaultTimeout) + def sendWithReply[T: ClassTag](message: Any): Future[T] = +sendWithReply(message, defaultAskTimeout) /** * Send a message to the corresponding [[RpcEndpoint.receiveAndReply)]] and return a `Future` to @@ -327,7 +328,7 @@ private[spark] abstract class RpcEndpointRef(@transient conf: SparkConf) * @tparam T type of the reply message * @return the reply message from the corresponding [[RpcEndpoint]] */ - def askWithReply[T: ClassTag](message: Any): T = askWithReply(message, defaultTimeout) + def askWithReply[T: ClassTag](message: Any): T = askWithReply(message, defaultAskTimeout) /** * Send a message to the corresponding [[RpcEndpoint.receive]] and get its result within a - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org