spark git commit: [SPARK-10470] [ML] ml.IsotonicRegressionModel.copy should set parent
Repository: spark Updated Branches: refs/heads/branch-1.5 88a07d89e -> 34d417e8e [SPARK-10470] [ML] ml.IsotonicRegressionModel.copy should set parent Copied model must have the same parent, but ml.IsotonicRegressionModel.copy did not set parent. Here fix it and add test case. Author: Yanbo LiangCloses #8637 from yanboliang/spark-10470. (cherry picked from commit f7b55dbfc3343cad988e2490478fce1a11343c73) Signed-off-by: Xiangrui Meng Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/34d417e8 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/34d417e8 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/34d417e8 Branch: refs/heads/branch-1.5 Commit: 34d417e8e947ed2c1884c772a6a5604c87840967 Parents: 88a07d8 Author: Yanbo Liang Authored: Tue Sep 8 12:48:21 2015 -0700 Committer: Xiangrui Meng Committed: Tue Sep 8 13:08:43 2015 -0700 -- .../org/apache/spark/ml/regression/IsotonicRegression.scala | 2 +- .../apache/spark/ml/regression/IsotonicRegressionSuite.scala| 5 + 2 files changed, 6 insertions(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/34d417e8/mllib/src/main/scala/org/apache/spark/ml/regression/IsotonicRegression.scala -- diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/IsotonicRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/IsotonicRegression.scala index d43a344..2ff500f 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/regression/IsotonicRegression.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/regression/IsotonicRegression.scala @@ -203,7 +203,7 @@ class IsotonicRegressionModel private[ml] ( def predictions: Vector = Vectors.dense(oldModel.predictions) override def copy(extra: ParamMap): IsotonicRegressionModel = { -copyValues(new IsotonicRegressionModel(uid, oldModel), extra) +copyValues(new IsotonicRegressionModel(uid, oldModel), extra).setParent(parent) } override def transform(dataset: DataFrame): DataFrame = { http://git-wip-us.apache.org/repos/asf/spark/blob/34d417e8/mllib/src/test/scala/org/apache/spark/ml/regression/IsotonicRegressionSuite.scala -- diff --git a/mllib/src/test/scala/org/apache/spark/ml/regression/IsotonicRegressionSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/regression/IsotonicRegressionSuite.scala index c0ab00b..59f4193 100644 --- a/mllib/src/test/scala/org/apache/spark/ml/regression/IsotonicRegressionSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/ml/regression/IsotonicRegressionSuite.scala @@ -19,6 +19,7 @@ package org.apache.spark.ml.regression import org.apache.spark.SparkFunSuite import org.apache.spark.ml.param.ParamsSuite +import org.apache.spark.ml.util.MLTestingUtils import org.apache.spark.mllib.linalg.Vectors import org.apache.spark.mllib.util.MLlibTestSparkContext import org.apache.spark.sql.{DataFrame, Row} @@ -89,6 +90,10 @@ class IsotonicRegressionSuite extends SparkFunSuite with MLlibTestSparkContext { assert(ir.getFeatureIndex === 0) val model = ir.fit(dataset) + +// copied model must have the same parent. +MLTestingUtils.checkCopy(model) + model.transform(dataset) .select("label", "features", "prediction", "weight") .collect() - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-10316] [SQL] respect nondeterministic expressions in PhysicalOperation
Repository: spark Updated Branches: refs/heads/master 5b2192e84 -> 5fd57955e [SPARK-10316] [SQL] respect nondeterministic expressions in PhysicalOperation We did a lot of special handling for non-deterministic expressions in `Optimizer`. However, `PhysicalOperation` just collects all Projects and Filters and mess it up. We should respect the operators order caused by non-deterministic expressions in `PhysicalOperation`. Author: Wenchen FanCloses #8486 from cloud-fan/fix. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/5fd57955 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/5fd57955 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/5fd57955 Branch: refs/heads/master Commit: 5fd57955ef477347408f68eb1cb6ad1881fdb6e0 Parents: 5b2192e Author: Wenchen Fan Authored: Tue Sep 8 12:05:41 2015 -0700 Committer: Michael Armbrust Committed: Tue Sep 8 12:05:41 2015 -0700 -- .../spark/sql/catalyst/planning/patterns.scala | 38 +--- .../org/apache/spark/sql/DataFrameSuite.scala | 12 +++ 2 files changed, 20 insertions(+), 30 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/5fd57955/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/planning/patterns.scala -- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/planning/patterns.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/planning/patterns.scala index e8abcd6..5353779 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/planning/patterns.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/planning/patterns.scala @@ -17,8 +17,6 @@ package org.apache.spark.sql.catalyst.planning -import scala.annotation.tailrec - import org.apache.spark.Logging import org.apache.spark.sql.catalyst.trees.TreeNodeRef import org.apache.spark.sql.catalyst.expressions._ @@ -26,27 +24,6 @@ import org.apache.spark.sql.catalyst.plans._ import org.apache.spark.sql.catalyst.plans.logical._ /** - * A pattern that matches any number of filter operations on top of another relational operator. - * Adjacent filter operators are collected and their conditions are broken up and returned as a - * sequence of conjunctive predicates. - * - * @return A tuple containing a sequence of conjunctive predicates that should be used to filter the - * output and a relational operator. - */ -object FilteredOperation extends PredicateHelper { - type ReturnType = (Seq[Expression], LogicalPlan) - - def unapply(plan: LogicalPlan): Option[ReturnType] = Some(collectFilters(Nil, plan)) - - @tailrec - private def collectFilters(filters: Seq[Expression], plan: LogicalPlan): ReturnType = plan match { -case Filter(condition, child) => - collectFilters(filters ++ splitConjunctivePredicates(condition), child) -case other => (filters, other) - } -} - -/** * A pattern that matches any number of project or filter operations on top of another relational * operator. All filter operators are collected and their conditions are broken up and returned * together with the top project operator. @@ -62,8 +39,9 @@ object PhysicalOperation extends PredicateHelper { } /** - * Collects projects and filters, in-lining/substituting aliases if necessary. Here are two - * examples for alias in-lining/substitution. Before: + * Collects all deterministic projects and filters, in-lining/substituting aliases if necessary. + * Here are two examples for alias in-lining/substitution. + * Before: * {{{ * SELECT c1 FROM (SELECT key AS c1 FROM t1) t2 WHERE c1 > 10 * SELECT c1 AS c2 FROM (SELECT key AS c1 FROM t1) t2 WHERE c1 > 10 @@ -74,15 +52,15 @@ object PhysicalOperation extends PredicateHelper { * SELECT key AS c2 FROM t1 WHERE key > 10 * }}} */ - def collectProjectsAndFilters(plan: LogicalPlan): + private def collectProjectsAndFilters(plan: LogicalPlan): (Option[Seq[NamedExpression]], Seq[Expression], LogicalPlan, Map[Attribute, Expression]) = plan match { - case Project(fields, child) => + case Project(fields, child) if fields.forall(_.deterministic) => val (_, filters, other, aliases) = collectProjectsAndFilters(child) val substitutedFields = fields.map(substitute(aliases)).asInstanceOf[Seq[NamedExpression]] (Some(substitutedFields), filters, other, collectAliases(substitutedFields)) - case Filter(condition, child) => + case Filter(condition, child) if condition.deterministic => val (fields, filters, other, aliases) = collectProjectsAndFilters(child) val
spark git commit: [SPARK-10441] [SQL] Save data correctly to json.
Repository: spark Updated Branches: refs/heads/master f7b55dbfc -> 7a9dcbc91 [SPARK-10441] [SQL] Save data correctly to json. https://issues.apache.org/jira/browse/SPARK-10441 Author: Yin HuaiCloses #8597 from yhuai/timestampJson. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/7a9dcbc9 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/7a9dcbc9 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/7a9dcbc9 Branch: refs/heads/master Commit: 7a9dcbc91d55dbc0cbf4812319bde65f4509b467 Parents: f7b55db Author: Yin Huai Authored: Tue Sep 8 14:10:12 2015 -0700 Committer: Michael Armbrust Committed: Tue Sep 8 14:10:12 2015 -0700 -- .../apache/spark/sql/RandomDataGenerator.scala | 41 +- .../datasources/json/JacksonGenerator.scala | 11 ++- .../datasources/json/JacksonParser.scala| 31 .../sql/hive/orc/OrcHadoopFsRelationSuite.scala | 8 ++ .../sql/sources/JsonHadoopFsRelationSuite.scala | 8 ++ .../sources/ParquetHadoopFsRelationSuite.scala | 9 ++- .../SimpleTextHadoopFsRelationSuite.scala | 19 - .../spark/sql/sources/SimpleTextRelation.scala | 7 +- .../sql/sources/hadoopFsRelationSuites.scala| 79 9 files changed, 205 insertions(+), 8 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/7a9dcbc9/sql/catalyst/src/test/scala/org/apache/spark/sql/RandomDataGenerator.scala -- diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/RandomDataGenerator.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/RandomDataGenerator.scala index 11e0c12..4025cbc 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/RandomDataGenerator.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/RandomDataGenerator.scala @@ -23,6 +23,8 @@ import java.math.MathContext import scala.util.Random +import org.apache.spark.sql.catalyst.CatalystTypeConverters +import org.apache.spark.sql.catalyst.util.DateTimeUtils import org.apache.spark.sql.types._ import org.apache.spark.unsafe.types.CalendarInterval @@ -84,6 +86,7 @@ object RandomDataGenerator { * random data generator is defined for that data type. The generated values will use an external * representation of the data type; for example, the random generator for [[DateType]] will return * instances of [[java.sql.Date]] and the generator for [[StructType]] will return a [[Row]]. + * For a [[UserDefinedType]] for a class X, an instance of class X is returned. * * @param dataType the type to generate values for * @param nullable whether null values should be generated @@ -106,7 +109,22 @@ object RandomDataGenerator { }) case BooleanType => Some(() => rand.nextBoolean()) case DateType => Some(() => new java.sql.Date(rand.nextInt())) - case TimestampType => Some(() => new java.sql.Timestamp(rand.nextLong())) + case TimestampType => +val generator = + () => { +var milliseconds = rand.nextLong() % 25340232959L +// -6213574080L is the number of milliseconds before January 1, 1970, 00:00:00 GMT +// for "0001-01-01 00:00:00.00". We need to find a +// number that is greater or equals to this number as a valid timestamp value. +while (milliseconds < -6213574080L) { + // 25340232959L is the the number of milliseconds since + // January 1, 1970, 00:00:00 GMT for "-12-31 23:59:59.99". + milliseconds = rand.nextLong() % 25340232959L +} +// DateTimeUtils.toJavaTimestamp takes microsecond. +DateTimeUtils.toJavaTimestamp(milliseconds * 1000) + } +Some(generator) case CalendarIntervalType => Some(() => { val months = rand.nextInt(1000) val ns = rand.nextLong() @@ -159,6 +177,27 @@ object RandomDataGenerator { None } } + case udt: UserDefinedType[_] => { +val maybeSqlTypeGenerator = forType(udt.sqlType, nullable, seed) +// Because random data generator at here returns scala value, we need to +// convert it to catalyst value to call udt's deserialize. +val toCatalystType = CatalystTypeConverters.createToCatalystConverter(udt.sqlType) + +if (maybeSqlTypeGenerator.isDefined) { + val sqlTypeGenerator = maybeSqlTypeGenerator.get + val generator = () => { +val generatedScalaValue = sqlTypeGenerator.apply() +if (generatedScalaValue == null) { + null +} else { +
spark git commit: [SPARK-10468] [ MLLIB ] Verify schema before Dataframe select API call
Repository: spark Updated Branches: refs/heads/master 7a9dcbc91 -> e6f8d3686 [SPARK-10468] [ MLLIB ] Verify schema before Dataframe select API call Loader.checkSchema was called to verify the schema after dataframe.select(...). Schema verification should be done before dataframe.select(...) Author: Vinod K CCloses #8636 from vinodkc/fix_GaussianMixtureModel_load_verification. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/e6f8d368 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/e6f8d368 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/e6f8d368 Branch: refs/heads/master Commit: e6f8d3686016a305a747c5bcc85f46fd4c0cbe83 Parents: 7a9dcbc Author: Vinod K C Authored: Tue Sep 8 14:44:05 2015 -0700 Committer: Xiangrui Meng Committed: Tue Sep 8 14:44:05 2015 -0700 -- .../org/apache/spark/mllib/clustering/GaussianMixtureModel.scala | 3 +-- .../src/main/scala/org/apache/spark/mllib/feature/Word2Vec.scala | 4 +--- 2 files changed, 2 insertions(+), 5 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/e6f8d368/mllib/src/main/scala/org/apache/spark/mllib/clustering/GaussianMixtureModel.scala -- diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/GaussianMixtureModel.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/GaussianMixtureModel.scala index 7f6163e..a590219 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/GaussianMixtureModel.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/GaussianMixtureModel.scala @@ -168,10 +168,9 @@ object GaussianMixtureModel extends Loader[GaussianMixtureModel] { val dataPath = Loader.dataPath(path) val sqlContext = new SQLContext(sc) val dataFrame = sqlContext.read.parquet(dataPath) - val dataArray = dataFrame.select("weight", "mu", "sigma").collect() - // Check schema explicitly since erasure makes it hard to use match-case for checking. Loader.checkSchema[Data](dataFrame.schema) + val dataArray = dataFrame.select("weight", "mu", "sigma").collect() val (weights, gaussians) = dataArray.map { case Row(weight: Double, mu: Vector, sigma: Matrix) => http://git-wip-us.apache.org/repos/asf/spark/blob/e6f8d368/mllib/src/main/scala/org/apache/spark/mllib/feature/Word2Vec.scala -- diff --git a/mllib/src/main/scala/org/apache/spark/mllib/feature/Word2Vec.scala b/mllib/src/main/scala/org/apache/spark/mllib/feature/Word2Vec.scala index 36b124c..58857c3 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/feature/Word2Vec.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/feature/Word2Vec.scala @@ -590,12 +590,10 @@ object Word2VecModel extends Loader[Word2VecModel] { val dataPath = Loader.dataPath(path) val sqlContext = new SQLContext(sc) val dataFrame = sqlContext.read.parquet(dataPath) - - val dataArray = dataFrame.select("word", "vector").collect() - // Check schema explicitly since erasure makes it hard to use match-case for checking. Loader.checkSchema[Data](dataFrame.schema) + val dataArray = dataFrame.select("word", "vector").collect() val word2VecMap = dataArray.map(i => (i.getString(0), i.getSeq[Float](1).toArray)).toMap new Word2VecModel(word2VecMap) } - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-10492] [STREAMING] [DOCUMENTATION] Update Streaming documentation about rate limiting and backpressure
Repository: spark Updated Branches: refs/heads/master e6f8d3686 -> 52b24a602 [SPARK-10492] [STREAMING] [DOCUMENTATION] Update Streaming documentation about rate limiting and backpressure Author: Tathagata DasCloses #8656 from tdas/SPARK-10492 and squashes the following commits: 986cdd6 [Tathagata Das] Added information on backpressure Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/52b24a60 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/52b24a60 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/52b24a60 Branch: refs/heads/master Commit: 52b24a602ad615a7f6aa427aefb1c7444c05d298 Parents: e6f8d36 Author: Tathagata Das Authored: Tue Sep 8 14:54:43 2015 -0700 Committer: Tathagata Das Committed: Tue Sep 8 14:54:43 2015 -0700 -- docs/configuration.md | 13 + docs/streaming-programming-guide.md | 13 - 2 files changed, 25 insertions(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/52b24a60/docs/configuration.md -- diff --git a/docs/configuration.md b/docs/configuration.md index a2cc7a3..e287591 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -1434,6 +1434,19 @@ Apart from these, the following properties are also available, and may be useful Property NameDefaultMeaning + spark.streaming.backpressure.enabled + false + +Enables or disables Spark Streaming's internal backpressure mechanism (since 1.5). +This enables the Spark Streaming to control the receiving rate based on the +current batch scheduling delays and processing times so that the system receives +only as fast as the system can process. Internally, this dynamically sets the +maximum receiving rate of receivers. This rate is upper bounded by the values +`spark.streaming.receiver.maxRate` and `spark.streaming.kafka.maxRatePerPartition` +if they are set (see below). + + + spark.streaming.blockInterval 200ms http://git-wip-us.apache.org/repos/asf/spark/blob/52b24a60/docs/streaming-programming-guide.md -- diff --git a/docs/streaming-programming-guide.md b/docs/streaming-programming-guide.md index a1acf83..c751dbb 100644 --- a/docs/streaming-programming-guide.md +++ b/docs/streaming-programming-guide.md @@ -1807,7 +1807,7 @@ To run a Spark Streaming applications, you need to have the following. + *Mesos* - [Marathon](https://github.com/mesosphere/marathon) has been used to achieve this with Mesos. -- *[Since Spark 1.2] Configuring write ahead logs* - Since Spark 1.2, +- *Configuring write ahead logs* - Since Spark 1.2, we have introduced _write ahead logs_ for achieving strong fault-tolerance guarantees. If enabled, all the data received from a receiver gets written into a write ahead log in the configuration checkpoint directory. This prevents data loss on driver @@ -1822,6 +1822,17 @@ To run a Spark Streaming applications, you need to have the following. stored in a replicated storage system. This can be done by setting the storage level for the input stream to `StorageLevel.MEMORY_AND_DISK_SER`. +- *Setting the max receiving rate* - If the cluster resources is not large enough for the streaming + application to process data as fast as it is being received, the receivers can be rate limited + by setting a maximum rate limit in terms of records / sec. + See the [configuration parameters](configuration.html#spark-streaming) + `spark.streaming.receiver.maxRate` for receivers and `spark.streaming.kafka.maxRatePerPartition` + for Direct Kafka approach. In Spark 1.5, we have introduced a feature called *backpressure* that + eliminate the need to set this rate limit, as Spark Streaming automatically figures out the + rate limits and dynamically adjusts them if the processing conditions change. This backpressure + can be enabled by setting the [configuration parameter](configuration.html#spark-streaming) + `spark.streaming.backpressure.enabled` to `true`. + ### Upgrading Application Code {:.no_toc} - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-10492] [STREAMING] [DOCUMENTATION] Update Streaming documentation about rate limiting and backpressure
Repository: spark Updated Branches: refs/heads/branch-1.5 7fd4674fc -> 63c72b93e [SPARK-10492] [STREAMING] [DOCUMENTATION] Update Streaming documentation about rate limiting and backpressure Author: Tathagata DasCloses #8656 from tdas/SPARK-10492 and squashes the following commits: 986cdd6 [Tathagata Das] Added information on backpressure (cherry picked from commit 52b24a602ad615a7f6aa427aefb1c7444c05d298) Signed-off-by: Tathagata Das Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/63c72b93 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/63c72b93 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/63c72b93 Branch: refs/heads/branch-1.5 Commit: 63c72b93eb51685814543a39caf9a6d221e2583c Parents: 7fd4674 Author: Tathagata Das Authored: Tue Sep 8 14:54:43 2015 -0700 Committer: Tathagata Das Committed: Tue Sep 8 14:54:54 2015 -0700 -- docs/configuration.md | 13 + docs/streaming-programming-guide.md | 13 - 2 files changed, 25 insertions(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/63c72b93/docs/configuration.md -- diff --git a/docs/configuration.md b/docs/configuration.md index 77c5cbc..353efdb 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -1438,6 +1438,19 @@ Apart from these, the following properties are also available, and may be useful Property NameDefaultMeaning + spark.streaming.backpressure.enabled + false + +Enables or disables Spark Streaming's internal backpressure mechanism (since 1.5). +This enables the Spark Streaming to control the receiving rate based on the +current batch scheduling delays and processing times so that the system receives +only as fast as the system can process. Internally, this dynamically sets the +maximum receiving rate of receivers. This rate is upper bounded by the values +`spark.streaming.receiver.maxRate` and `spark.streaming.kafka.maxRatePerPartition` +if they are set (see below). + + + spark.streaming.blockInterval 200ms http://git-wip-us.apache.org/repos/asf/spark/blob/63c72b93/docs/streaming-programming-guide.md -- diff --git a/docs/streaming-programming-guide.md b/docs/streaming-programming-guide.md index a1acf83..c751dbb 100644 --- a/docs/streaming-programming-guide.md +++ b/docs/streaming-programming-guide.md @@ -1807,7 +1807,7 @@ To run a Spark Streaming applications, you need to have the following. + *Mesos* - [Marathon](https://github.com/mesosphere/marathon) has been used to achieve this with Mesos. -- *[Since Spark 1.2] Configuring write ahead logs* - Since Spark 1.2, +- *Configuring write ahead logs* - Since Spark 1.2, we have introduced _write ahead logs_ for achieving strong fault-tolerance guarantees. If enabled, all the data received from a receiver gets written into a write ahead log in the configuration checkpoint directory. This prevents data loss on driver @@ -1822,6 +1822,17 @@ To run a Spark Streaming applications, you need to have the following. stored in a replicated storage system. This can be done by setting the storage level for the input stream to `StorageLevel.MEMORY_AND_DISK_SER`. +- *Setting the max receiving rate* - If the cluster resources is not large enough for the streaming + application to process data as fast as it is being received, the receivers can be rate limited + by setting a maximum rate limit in terms of records / sec. + See the [configuration parameters](configuration.html#spark-streaming) + `spark.streaming.receiver.maxRate` for receivers and `spark.streaming.kafka.maxRatePerPartition` + for Direct Kafka approach. In Spark 1.5, we have introduced a feature called *backpressure* that + eliminate the need to set this rate limit, as Spark Streaming automatically figures out the + rate limits and dynamically adjusts them if the processing conditions change. This backpressure + can be enabled by setting the [configuration parameter](configuration.html#spark-streaming) + `spark.streaming.backpressure.enabled` to `true`. + ### Upgrading Application Code {:.no_toc} - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-10441] [SQL] [BRANCH-1.5] Save data correctly to json.
Repository: spark Updated Branches: refs/heads/branch-1.5 34d417e8e -> 7fd4674fc [SPARK-10441] [SQL] [BRANCH-1.5] Save data correctly to json. https://issues.apache.org/jira/browse/SPARK-10441 This is the backport of #8597 for 1.5 branch. Author: Yin HuaiCloses #8655 from yhuai/timestampJson-1.5. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/7fd4674f Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/7fd4674f Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/7fd4674f Branch: refs/heads/branch-1.5 Commit: 7fd4674fc93102f88f961726a0a44006ba6a8140 Parents: 34d417e Author: Yin Huai Authored: Tue Sep 8 14:20:35 2015 -0700 Committer: Yin Huai Committed: Tue Sep 8 14:20:35 2015 -0700 -- .../apache/spark/sql/RandomDataGenerator.scala | 41 +- .../datasources/json/JacksonGenerator.scala | 11 ++- .../datasources/json/JacksonParser.scala| 31 .../sql/hive/orc/OrcHadoopFsRelationSuite.scala | 8 ++ .../sql/sources/JsonHadoopFsRelationSuite.scala | 8 ++ .../sources/ParquetHadoopFsRelationSuite.scala | 9 ++- .../SimpleTextHadoopFsRelationSuite.scala | 19 - .../spark/sql/sources/SimpleTextRelation.scala | 7 +- .../sql/sources/hadoopFsRelationSuites.scala| 79 9 files changed, 205 insertions(+), 8 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/7fd4674f/sql/catalyst/src/test/scala/org/apache/spark/sql/RandomDataGenerator.scala -- diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/RandomDataGenerator.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/RandomDataGenerator.scala index 11e0c12..4025cbc 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/RandomDataGenerator.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/RandomDataGenerator.scala @@ -23,6 +23,8 @@ import java.math.MathContext import scala.util.Random +import org.apache.spark.sql.catalyst.CatalystTypeConverters +import org.apache.spark.sql.catalyst.util.DateTimeUtils import org.apache.spark.sql.types._ import org.apache.spark.unsafe.types.CalendarInterval @@ -84,6 +86,7 @@ object RandomDataGenerator { * random data generator is defined for that data type. The generated values will use an external * representation of the data type; for example, the random generator for [[DateType]] will return * instances of [[java.sql.Date]] and the generator for [[StructType]] will return a [[Row]]. + * For a [[UserDefinedType]] for a class X, an instance of class X is returned. * * @param dataType the type to generate values for * @param nullable whether null values should be generated @@ -106,7 +109,22 @@ object RandomDataGenerator { }) case BooleanType => Some(() => rand.nextBoolean()) case DateType => Some(() => new java.sql.Date(rand.nextInt())) - case TimestampType => Some(() => new java.sql.Timestamp(rand.nextLong())) + case TimestampType => +val generator = + () => { +var milliseconds = rand.nextLong() % 25340232959L +// -6213574080L is the number of milliseconds before January 1, 1970, 00:00:00 GMT +// for "0001-01-01 00:00:00.00". We need to find a +// number that is greater or equals to this number as a valid timestamp value. +while (milliseconds < -6213574080L) { + // 25340232959L is the the number of milliseconds since + // January 1, 1970, 00:00:00 GMT for "-12-31 23:59:59.99". + milliseconds = rand.nextLong() % 25340232959L +} +// DateTimeUtils.toJavaTimestamp takes microsecond. +DateTimeUtils.toJavaTimestamp(milliseconds * 1000) + } +Some(generator) case CalendarIntervalType => Some(() => { val months = rand.nextInt(1000) val ns = rand.nextLong() @@ -159,6 +177,27 @@ object RandomDataGenerator { None } } + case udt: UserDefinedType[_] => { +val maybeSqlTypeGenerator = forType(udt.sqlType, nullable, seed) +// Because random data generator at here returns scala value, we need to +// convert it to catalyst value to call udt's deserialize. +val toCatalystType = CatalystTypeConverters.createToCatalystConverter(udt.sqlType) + +if (maybeSqlTypeGenerator.isDefined) { + val sqlTypeGenerator = maybeSqlTypeGenerator.get + val generator = () => { +val generatedScalaValue = sqlTypeGenerator.apply() +if (generatedScalaValue == null) { + null +}
spark git commit: Docs small fixes
Repository: spark Updated Branches: refs/heads/branch-1.5 37c5edf1c -> 88a07d89e Docs small fixes Author: Jacek LaskowskiCloses #8629 from jaceklaskowski/docs-fixes. (cherry picked from commit 6ceed852ab716d8acc46ce90cba9cfcff6d3616f) Signed-off-by: Sean Owen Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/88a07d89 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/88a07d89 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/88a07d89 Branch: refs/heads/branch-1.5 Commit: 88a07d89e91c139a65d3a2d46632500a93b615c3 Parents: 37c5edf Author: Jacek Laskowski Authored: Tue Sep 8 14:38:10 2015 +0100 Committer: Sean Owen Committed: Tue Sep 8 14:38:19 2015 +0100 -- docs/building-spark.md | 23 +++ docs/cluster-overview.md | 15 --- 2 files changed, 19 insertions(+), 19 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/88a07d89/docs/building-spark.md -- diff --git a/docs/building-spark.md b/docs/building-spark.md index f133eb9..4db32cf 100644 --- a/docs/building-spark.md +++ b/docs/building-spark.md @@ -61,12 +61,13 @@ If you don't run this, you may see errors like the following: You can fix this by setting the `MAVEN_OPTS` variable as discussed before. **Note:** -* *For Java 8 and above this step is not required.* -* *If using `build/mvn` and `MAVEN_OPTS` were not already set, the script will automate this for you.* + +* For Java 8 and above this step is not required. +* If using `build/mvn` with no `MAVEN_OPTS` set, the script will automate this for you. # Specifying the Hadoop Version -Because HDFS is not protocol-compatible across versions, if you want to read from HDFS, you'll need to build Spark against the specific HDFS version in your environment. You can do this through the "hadoop.version" property. If unset, Spark will build against Hadoop 2.2.0 by default. Note that certain build profiles are required for particular Hadoop versions: +Because HDFS is not protocol-compatible across versions, if you want to read from HDFS, you'll need to build Spark against the specific HDFS version in your environment. You can do this through the `hadoop.version` property. If unset, Spark will build against Hadoop 2.2.0 by default. Note that certain build profiles are required for particular Hadoop versions: @@ -91,7 +92,7 @@ mvn -Dhadoop.version=1.2.1 -Phadoop-1 -DskipTests clean package mvn -Dhadoop.version=2.0.0-mr1-cdh4.2.0 -Phadoop-1 -DskipTests clean package {% endhighlight %} -You can enable the "yarn" profile and optionally set the "yarn.version" property if it is different from "hadoop.version". Spark only supports YARN versions 2.2.0 and later. +You can enable the `yarn` profile and optionally set the `yarn.version` property if it is different from `hadoop.version`. Spark only supports YARN versions 2.2.0 and later. Examples: @@ -125,7 +126,7 @@ mvn -Pyarn -Phadoop-2.4 -Dhadoop.version=2.4.0 -Phive -Phive-thriftserver -Dskip # Building for Scala 2.11 To produce a Spark package compiled with Scala 2.11, use the `-Dscala-2.11` property: -dev/change-scala-version.sh 2.11 +./dev/change-scala-version.sh 2.11 mvn -Pyarn -Phadoop-2.4 -Dscala-2.11 -DskipTests clean package Spark does not yet support its JDBC component for Scala 2.11. @@ -163,11 +164,9 @@ the `spark-parent` module). Thus, the full flow for running continuous-compilation of the `core` submodule may look more like: -``` - $ mvn install - $ cd core - $ mvn scala:cc -``` +$ mvn install +$ cd core +$ mvn scala:cc # Building Spark with IntelliJ IDEA or Eclipse @@ -193,11 +192,11 @@ then ship it over to the cluster. We are investigating the exact cause for this. # Packaging without Hadoop Dependencies for YARN -The assembly jar produced by `mvn package` will, by default, include all of Spark's dependencies, including Hadoop and some of its ecosystem projects. On YARN deployments, this causes multiple versions of these to appear on executor classpaths: the version packaged in the Spark assembly and the version on each node, included with yarn.application.classpath. The `hadoop-provided` profile builds the assembly without including Hadoop-ecosystem projects, like ZooKeeper and Hadoop itself. +The assembly jar produced by `mvn package` will, by default, include all of Spark's dependencies, including Hadoop and some of its ecosystem projects. On YARN deployments, this causes multiple versions of these to appear on executor classpaths: the version packaged in the Spark assembly and the version on each node, included with
spark git commit: [SPARK-10327] [SQL] Cache Table is not working while subquery has alias in its project list
Repository: spark Updated Branches: refs/heads/master 52b24a602 -> d637a666d [SPARK-10327] [SQL] Cache Table is not working while subquery has alias in its project list ```scala import org.apache.spark.sql.hive.execution.HiveTableScan sql("select key, value, key + 1 from src").registerTempTable("abc") cacheTable("abc") val sparkPlan = sql( """select a.key, b.key, c.key from |abc a join abc b on a.key=b.key |join abc c on a.key=c.key""".stripMargin).queryExecution.sparkPlan assert(sparkPlan.collect { case e: InMemoryColumnarTableScan => e }.size === 3) // failed assert(sparkPlan.collect { case e: HiveTableScan => e }.size === 0) // failed ``` The actual plan is: ``` == Parsed Logical Plan == 'Project [unresolvedalias('a.key),unresolvedalias('b.key),unresolvedalias('c.key)] 'Join Inner, Some(('a.key = 'c.key)) 'Join Inner, Some(('a.key = 'b.key)) 'UnresolvedRelation [abc], Some(a) 'UnresolvedRelation [abc], Some(b) 'UnresolvedRelation [abc], Some(c) == Analyzed Logical Plan == key: int, key: int, key: int Project [key#14,key#61,key#66] Join Inner, Some((key#14 = key#66)) Join Inner, Some((key#14 = key#61)) Subquery a Subquery abc Project [key#14,value#15,(key#14 + 1) AS _c2#16] MetastoreRelation default, src, None Subquery b Subquery abc Project [key#61,value#62,(key#61 + 1) AS _c2#58] MetastoreRelation default, src, None Subquery c Subquery abc Project [key#66,value#67,(key#66 + 1) AS _c2#63] MetastoreRelation default, src, None == Optimized Logical Plan == Project [key#14,key#61,key#66] Join Inner, Some((key#14 = key#66)) Project [key#14,key#61] Join Inner, Some((key#14 = key#61)) Project [key#14] InMemoryRelation [key#14,value#15,_c2#16], true, 1, StorageLevel(true, true, false, true, 1), (Project [key#14,value#15,(key#14 + 1) AS _c2#16]), Some(abc) Project [key#61] MetastoreRelation default, src, None Project [key#66] MetastoreRelation default, src, None == Physical Plan == TungstenProject [key#14,key#61,key#66] BroadcastHashJoin [key#14], [key#66], BuildRight TungstenProject [key#14,key#61] BroadcastHashJoin [key#14], [key#61], BuildRight ConvertToUnsafe InMemoryColumnarTableScan [key#14], (InMemoryRelation [key#14,value#15,_c2#16], true, 1, StorageLevel(true, true, false, true, 1), (Project [key#14,value#15,(key#14 + 1) AS _c2#16]), Some(abc)) ConvertToUnsafe HiveTableScan [key#61], (MetastoreRelation default, src, None) ConvertToUnsafe HiveTableScan [key#66], (MetastoreRelation default, src, None) ``` Author: Cheng HaoCloses #8494 from chenghao-intel/weird_cache. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/d637a666 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/d637a666 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/d637a666 Branch: refs/heads/master Commit: d637a666d5932002c8ce0bd23c06064fbfdc1c97 Parents: 52b24a6 Author: Cheng Hao Authored: Tue Sep 8 16:16:50 2015 -0700 Committer: Michael Armbrust Committed: Tue Sep 8 16:16:50 2015 -0700 -- .../sql/catalyst/plans/logical/LogicalPlan.scala| 15 --- .../org/apache/spark/sql/CachedTableSuite.scala | 16 2 files changed, 28 insertions(+), 3 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/d637a666/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala -- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala index 9bb466a..8f8747e 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala @@ -135,16 +135,25 @@ abstract class LogicalPlan extends QueryPlan[LogicalPlan] with Logging { /** Args that have cleaned such that differences in expression id should not affect equality */ protected lazy val cleanArgs: Seq[Any] = { val input = children.flatMap(_.output) +def cleanExpression(e: Expression) = e match { + case a: Alias => +// As the root of the expression, Alias will always take an arbitrary exprId, we need +// to erase that for equality testing. +val cleanedExprId = Alias(a.child, a.name)(ExprId(-1), a.qualifiers) +BindReferences.bindReference(cleanedExprId, input, allowFailures = true) + case other => BindReferences.bindReference(other, input, allowFailures =
spark git commit: [HOTFIX] Fix build break caused by #8494
Repository: spark Updated Branches: refs/heads/master d637a666d -> 2143d592c [HOTFIX] Fix build break caused by #8494 Author: Michael ArmbrustCloses #8659 from marmbrus/testBuildBreak. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/2143d592 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/2143d592 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/2143d592 Branch: refs/heads/master Commit: 2143d592c802ec8f83a1eb5ce9b33ad8e48d7196 Parents: d637a66 Author: Michael Armbrust Authored: Tue Sep 8 16:51:45 2015 -0700 Committer: Michael Armbrust Committed: Tue Sep 8 16:51:45 2015 -0700 -- .../src/test/scala/org/apache/spark/sql/CachedTableSuite.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/2143d592/sql/core/src/test/scala/org/apache/spark/sql/CachedTableSuite.scala -- diff --git a/sql/core/src/test/scala/org/apache/spark/sql/CachedTableSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/CachedTableSuite.scala index 84e66b5..356d4ff 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/CachedTableSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/CachedTableSuite.scala @@ -342,9 +342,9 @@ class CachedTableSuite extends QueryTest with SharedSQLContext { } test("SPARK-10327 Cache Table is not working while subquery has alias in its project list") { -ctx.sparkContext.parallelize((1, 1) :: (2, 2) :: Nil) +sparkContext.parallelize((1, 1) :: (2, 2) :: Nil) .toDF("key", "value").selectExpr("key", "value", "key+1").registerTempTable("abc") -ctx.cacheTable("abc") +sqlContext.cacheTable("abc") val sparkPlan = sql( """select a.key, b.key, c.key from - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
Git Push Summary
Repository: spark Updated Tags: refs/tags/v1.2.0-snapshot0 [deleted] bc0987579 refs/tags/v1.2.0-snapshot1 [deleted] 38c1fbd96 - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
Git Push Summary
Repository: spark Updated Tags: refs/tags/v1.3.1-rc1 [deleted] 0dcb5d9f3 refs/tags/v1.3.1-rc2 [deleted] 7c4473aa5 - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
Git Push Summary
Repository: spark Updated Tags: refs/tags/v1.4.0-rc1 [deleted] 777a08166 refs/tags/v1.4.0-rc2 [deleted] 03fb26a3e refs/tags/v1.4.0-rc3 [deleted] f2796816b refs/tags/v1.4.0-rc4 [deleted] d630f4d69 - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
Git Push Summary
Repository: spark Updated Tags: refs/tags/v0.9.1-rc1 [deleted] 20a2d247a refs/tags/v0.9.1-rc2 [deleted] 95d70498b refs/tags/v0.9.1-rc3 [deleted] 931ac8a0e refs/tags/v0.9.2-rc1 [deleted] 5a3020615 - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
Git Push Summary
Repository: spark Updated Tags: refs/tags/v1.4.1-rc1 [deleted] d0a5560ce refs/tags/v1.4.1-rc2 [deleted] 07b95c7ad refs/tags/v1.4.1-rc3 [deleted] 3e8ae3894 refs/tags/v1.4.1-rc4 [deleted] dbaa5c294 - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-9170] [SQL] Use OrcStructInspector to be case preserving when writing ORC files
Repository: spark Updated Branches: refs/heads/master 6ceed852a -> 990c9f79c [SPARK-9170] [SQL] Use OrcStructInspector to be case preserving when writing ORC files JIRA: https://issues.apache.org/jira/browse/SPARK-9170 `StandardStructObjectInspector` will implicitly lowercase column names. But I think Orc format doesn't have such requirement. In fact, there is a `OrcStructInspector` specified for Orc format. We should use it when serialize rows to Orc file. It can be case preserving when writing ORC files. Author: Liang-Chi HsiehCloses #7520 from viirya/use_orcstruct. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/990c9f79 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/990c9f79 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/990c9f79 Branch: refs/heads/master Commit: 990c9f79c28db501018a0a3af446ff879962475d Parents: 6ceed85 Author: Liang-Chi Hsieh Authored: Tue Sep 8 23:07:34 2015 +0800 Committer: Cheng Lian Committed: Tue Sep 8 23:07:34 2015 +0800 -- .../apache/spark/sql/hive/orc/OrcRelation.scala | 47 +++- .../spark/sql/hive/orc/OrcQuerySuite.scala | 14 ++ 2 files changed, 40 insertions(+), 21 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/990c9f79/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcRelation.scala -- diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcRelation.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcRelation.scala index 4eeca9a..7e89109 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcRelation.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcRelation.scala @@ -25,9 +25,9 @@ import com.google.common.base.Objects import org.apache.hadoop.conf.Configuration import org.apache.hadoop.fs.{FileStatus, Path} import org.apache.hadoop.hive.conf.HiveConf.ConfVars -import org.apache.hadoop.hive.ql.io.orc.{OrcInputFormat, OrcOutputFormat, OrcSerde, OrcSplit} -import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector -import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils +import org.apache.hadoop.hive.ql.io.orc.{OrcInputFormat, OrcOutputFormat, OrcSerde, OrcSplit, OrcStruct} +import org.apache.hadoop.hive.serde2.objectinspector.SettableStructObjectInspector +import org.apache.hadoop.hive.serde2.typeinfo.{TypeInfoUtils, StructTypeInfo} import org.apache.hadoop.io.{NullWritable, Writable} import org.apache.hadoop.mapred.{InputFormat => MapRedInputFormat, JobConf, OutputFormat => MapRedOutputFormat, RecordWriter, Reporter} import org.apache.hadoop.mapreduce.lib.input.FileInputFormat @@ -89,21 +89,10 @@ private[orc] class OrcOutputWriter( TypeInfoUtils.getTypeInfoFromTypeString( HiveMetastoreTypes.toMetastoreType(dataSchema)) -TypeInfoUtils - .getStandardJavaObjectInspectorFromTypeInfo(typeInfo) - .asInstanceOf[StructObjectInspector] +OrcStruct.createObjectInspector(typeInfo.asInstanceOf[StructTypeInfo]) + .asInstanceOf[SettableStructObjectInspector] } - // Used to hold temporary `Writable` fields of the next row to be written. - private val reusableOutputBuffer = new Array[Any](dataSchema.length) - - // Used to convert Catalyst values into Hadoop `Writable`s. - private val wrappers = structOI.getAllStructFieldRefs.asScala -.zip(dataSchema.fields.map(_.dataType)) -.map { case (ref, dt) => - wrapperFor(ref.getFieldObjectInspector, dt) -}.toArray - // `OrcRecordWriter.close()` creates an empty file if no rows are written at all. We use this // flag to decide whether `OrcRecordWriter.close()` needs to be called. private var recordWriterInstantiated = false @@ -127,16 +116,32 @@ private[orc] class OrcOutputWriter( override def write(row: Row): Unit = throw new UnsupportedOperationException("call writeInternal") - override protected[sql] def writeInternal(row: InternalRow): Unit = { + private def wrapOrcStruct( + struct: OrcStruct, + oi: SettableStructObjectInspector, + row: InternalRow): Unit = { +val fieldRefs = oi.getAllStructFieldRefs var i = 0 -while (i < row.numFields) { - reusableOutputBuffer(i) = wrappers(i)(row.get(i, dataSchema(i).dataType)) +while (i < fieldRefs.size) { + oi.setStructFieldData( +struct, +fieldRefs.get(i), +wrap( + row.get(i, dataSchema(i).dataType), + fieldRefs.get(i).getFieldObjectInspector, + dataSchema(i).dataType)) i += 1 } + } + + val cachedOrcStruct = structOI.create().asInstanceOf[OrcStruct] + + override protected[sql] def writeInternal(row:
spark git commit: [SPARK-10480] [ML] Fix ML.LinearRegressionModel.copy()
Repository: spark Updated Branches: refs/heads/master 990c9f79c -> 5b2192e84 [SPARK-10480] [ML] Fix ML.LinearRegressionModel.copy() This PR fix two model ```copy()``` related issues: [SPARK-10480](https://issues.apache.org/jira/browse/SPARK-10480) ```ML.LinearRegressionModel.copy()``` ignored argument ```extra```, it will not take effect when users setting this parameter. [SPARK-10479](https://issues.apache.org/jira/browse/SPARK-10479) ```ML.LogisticRegressionModel.copy()``` should copy model summary if available. Author: Yanbo LiangCloses #8641 from yanboliang/linear-regression-copy. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/5b2192e8 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/5b2192e8 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/5b2192e8 Branch: refs/heads/master Commit: 5b2192e846b843d8a0cb9427d19bb677431194a0 Parents: 990c9f7 Author: Yanbo Liang Authored: Tue Sep 8 11:11:35 2015 -0700 Committer: Xiangrui Meng Committed: Tue Sep 8 11:11:35 2015 -0700 -- .../org/apache/spark/ml/classification/LogisticRegression.scala | 4 +++- .../scala/org/apache/spark/ml/regression/LinearRegression.scala | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/5b2192e8/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala -- diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala index 21fbe38..a460262 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala @@ -468,7 +468,9 @@ class LogisticRegressionModel private[ml] ( } override def copy(extra: ParamMap): LogisticRegressionModel = { -copyValues(new LogisticRegressionModel(uid, weights, intercept), extra).setParent(parent) +val newModel = copyValues(new LogisticRegressionModel(uid, weights, intercept), extra) +if (trainingSummary.isDefined) newModel.setSummary(trainingSummary.get) +newModel.setParent(parent) } override protected def raw2prediction(rawPrediction: Vector): Double = { http://git-wip-us.apache.org/repos/asf/spark/blob/5b2192e8/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala -- diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala index 884003e..e4602d3 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala @@ -310,7 +310,7 @@ class LinearRegressionModel private[ml] ( } override def copy(extra: ParamMap): LinearRegressionModel = { -val newModel = copyValues(new LinearRegressionModel(uid, weights, intercept)) +val newModel = copyValues(new LinearRegressionModel(uid, weights, intercept), extra) if (trainingSummary.isDefined) newModel.setSummary(trainingSummary.get) newModel.setParent(parent) } - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
svn commit: r10454 - /dev/spark/spark-1.5.0/ /release/spark/spark-1.5.0/
Author: rxin Date: Wed Sep 9 05:48:33 2015 New Revision: 10454 Log: Spark 1.5.0. Added: release/spark/spark-1.5.0/ - copied from r10453, dev/spark/spark-1.5.0/ Removed: dev/spark/spark-1.5.0/ - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
svn commit: r10453 - /dev/spark/spark-1.5.0/
Author: rxin Date: Wed Sep 9 05:40:42 2015 New Revision: 10453 Log: Add spark-1.5.0 Added: dev/spark/spark-1.5.0/ dev/spark/spark-1.5.0/spark-1.5.0-bin-cdh4.tgz (with props) dev/spark/spark-1.5.0/spark-1.5.0-bin-cdh4.tgz.asc dev/spark/spark-1.5.0/spark-1.5.0-bin-cdh4.tgz.md5 dev/spark/spark-1.5.0/spark-1.5.0-bin-cdh4.tgz.sha dev/spark/spark-1.5.0/spark-1.5.0-bin-hadoop1-scala2.11.tgz (with props) dev/spark/spark-1.5.0/spark-1.5.0-bin-hadoop1-scala2.11.tgz.asc dev/spark/spark-1.5.0/spark-1.5.0-bin-hadoop1-scala2.11.tgz.md5 dev/spark/spark-1.5.0/spark-1.5.0-bin-hadoop1-scala2.11.tgz.sha dev/spark/spark-1.5.0/spark-1.5.0-bin-hadoop1.tgz (with props) dev/spark/spark-1.5.0/spark-1.5.0-bin-hadoop1.tgz.asc dev/spark/spark-1.5.0/spark-1.5.0-bin-hadoop1.tgz.md5 dev/spark/spark-1.5.0/spark-1.5.0-bin-hadoop1.tgz.sha dev/spark/spark-1.5.0/spark-1.5.0-bin-hadoop2.3.tgz (with props) dev/spark/spark-1.5.0/spark-1.5.0-bin-hadoop2.3.tgz.asc dev/spark/spark-1.5.0/spark-1.5.0-bin-hadoop2.3.tgz.md5 dev/spark/spark-1.5.0/spark-1.5.0-bin-hadoop2.3.tgz.sha dev/spark/spark-1.5.0/spark-1.5.0-bin-hadoop2.4.tgz (with props) dev/spark/spark-1.5.0/spark-1.5.0-bin-hadoop2.4.tgz.asc dev/spark/spark-1.5.0/spark-1.5.0-bin-hadoop2.4.tgz.md5 dev/spark/spark-1.5.0/spark-1.5.0-bin-hadoop2.4.tgz.sha dev/spark/spark-1.5.0/spark-1.5.0-bin-hadoop2.6.tgz (with props) dev/spark/spark-1.5.0/spark-1.5.0-bin-hadoop2.6.tgz.asc dev/spark/spark-1.5.0/spark-1.5.0-bin-hadoop2.6.tgz.md5 dev/spark/spark-1.5.0/spark-1.5.0-bin-hadoop2.6.tgz.sha dev/spark/spark-1.5.0/spark-1.5.0-bin-without-hadoop.tgz (with props) dev/spark/spark-1.5.0/spark-1.5.0-bin-without-hadoop.tgz.asc dev/spark/spark-1.5.0/spark-1.5.0-bin-without-hadoop.tgz.md5 dev/spark/spark-1.5.0/spark-1.5.0-bin-without-hadoop.tgz.sha dev/spark/spark-1.5.0/spark-1.5.0.tgz (with props) dev/spark/spark-1.5.0/spark-1.5.0.tgz.asc dev/spark/spark-1.5.0/spark-1.5.0.tgz.md5 dev/spark/spark-1.5.0/spark-1.5.0.tgz.sha Added: dev/spark/spark-1.5.0/spark-1.5.0-bin-cdh4.tgz == Binary file - no diff available. Propchange: dev/spark/spark-1.5.0/spark-1.5.0-bin-cdh4.tgz -- svn:mime-type = application/octet-stream Added: dev/spark/spark-1.5.0/spark-1.5.0-bin-cdh4.tgz.asc == --- dev/spark/spark-1.5.0/spark-1.5.0-bin-cdh4.tgz.asc (added) +++ dev/spark/spark-1.5.0/spark-1.5.0-bin-cdh4.tgz.asc Wed Sep 9 05:40:42 2015 @@ -0,0 +1,11 @@ +-BEGIN PGP SIGNATURE- +Version: GnuPG v2.0.14 (GNU/Linux) + +iQEcBAABAgAGBQJV5OGUAAoJEHxsEF/8jtCJ1e0IAKgZaKj15mKsmu9jr0NdAdTx +Nkwg8qrUYEQcniqKepGImENc0jSIJEUqSIC93fWOvXiiQk4v3tRSTHq5qxE9d+d3 +D245yVuznhMDPELrmeEppEcZZC1xDbC/cwVR/7Uo/zCZ3tGk113wF+Du2nAF0/us +U1zLgotmuWKWvAGpgqO3UkjA8oqaXYrkFRVzbTnTSfSdUqfNRzKppjkq4UPVpYVE +kMl8CadMEIk2DdCEI9o4NIA3aKicz/kuftnNZaDhc0+v/xNSxPsAKntnyVrXtnUB +qMemo9k98ge4+kEOIVk9V8wJKR0GzmpjPrY1sWMvrKxhHajT0HQm5wNCj8obqmM= +=AZpH +-END PGP SIGNATURE- Added: dev/spark/spark-1.5.0/spark-1.5.0-bin-cdh4.tgz.md5 == --- dev/spark/spark-1.5.0/spark-1.5.0-bin-cdh4.tgz.md5 (added) +++ dev/spark/spark-1.5.0/spark-1.5.0-bin-cdh4.tgz.md5 Wed Sep 9 05:40:42 2015 @@ -0,0 +1 @@ +spark-1.5.0-bin-cdh4.tgz: F6 E1 BA 9D 2D 85 C9 A3 65 42 FD 48 1D 65 6C 66 Added: dev/spark/spark-1.5.0/spark-1.5.0-bin-cdh4.tgz.sha == --- dev/spark/spark-1.5.0/spark-1.5.0-bin-cdh4.tgz.sha (added) +++ dev/spark/spark-1.5.0/spark-1.5.0-bin-cdh4.tgz.sha Wed Sep 9 05:40:42 2015 @@ -0,0 +1,3 @@ +spark-1.5.0-bin-cdh4.tgz: A11367BF 57EE705A 7FA77255 8AFDBB77 193CD07B 6CD1B63A + 49E84362 CB066B77 70334433 D2B58624 24F25339 080D89F0 + 7D76B196 69C0359C B4223A26 7CB3523B Added: dev/spark/spark-1.5.0/spark-1.5.0-bin-hadoop1-scala2.11.tgz == Binary file - no diff available. Propchange: dev/spark/spark-1.5.0/spark-1.5.0-bin-hadoop1-scala2.11.tgz -- svn:mime-type = application/octet-stream Added: dev/spark/spark-1.5.0/spark-1.5.0-bin-hadoop1-scala2.11.tgz.asc == --- dev/spark/spark-1.5.0/spark-1.5.0-bin-hadoop1-scala2.11.tgz.asc (added) +++ dev/spark/spark-1.5.0/spark-1.5.0-bin-hadoop1-scala2.11.tgz.asc Wed Sep 9 05:40:42 2015 @@ -0,0 +1,11 @@ +-BEGIN PGP SIGNATURE- +Version: GnuPG v2.0.14 (GNU/Linux) + +iQEcBAABAgAGBQJV5OE6AAoJEHxsEF/8jtCJGBEIAKCsYlfSpkVEcMPsTwRBwjGb
svn commit: r1701901 [2/3] - in /spark: ./ releases/_posts/ site/ site/graphx/ site/mllib/ site/news/ site/releases/ site/screencasts/ site/sql/ site/streaming/
Modified: spark/site/news/spark-summit-2013-is-a-wrap.html URL: http://svn.apache.org/viewvc/spark/site/news/spark-summit-2013-is-a-wrap.html?rev=1701901=1701900=1701901=diff == --- spark/site/news/spark-summit-2013-is-a-wrap.html (original) +++ spark/site/news/spark-summit-2013-is-a-wrap.html Wed Sep 9 04:51:23 2015 @@ -134,6 +134,9 @@ Latest News + Spark 1.5.0 released + (Sep 09, 2015) + Spark Summit Europe agenda posted (Sep 07, 2015) @@ -143,9 +146,6 @@ Spark Summit 2015 Videos Posted (Jun 29, 2015) - Spark 1.4.0 released - (Jun 11, 2015) - Archive Modified: spark/site/news/spark-summit-2014-videos-posted.html URL: http://svn.apache.org/viewvc/spark/site/news/spark-summit-2014-videos-posted.html?rev=1701901=1701900=1701901=diff == --- spark/site/news/spark-summit-2014-videos-posted.html (original) +++ spark/site/news/spark-summit-2014-videos-posted.html Wed Sep 9 04:51:23 2015 @@ -134,6 +134,9 @@ Latest News + Spark 1.5.0 released + (Sep 09, 2015) + Spark Summit Europe agenda posted (Sep 07, 2015) @@ -143,9 +146,6 @@ Spark Summit 2015 Videos Posted (Jun 29, 2015) - Spark 1.4.0 released - (Jun 11, 2015) - Archive Modified: spark/site/news/spark-summit-2015-videos-posted.html URL: http://svn.apache.org/viewvc/spark/site/news/spark-summit-2015-videos-posted.html?rev=1701901=1701900=1701901=diff == --- spark/site/news/spark-summit-2015-videos-posted.html (original) +++ spark/site/news/spark-summit-2015-videos-posted.html Wed Sep 9 04:51:23 2015 @@ -134,6 +134,9 @@ Latest News + Spark 1.5.0 released + (Sep 09, 2015) + Spark Summit Europe agenda posted (Sep 07, 2015) @@ -143,9 +146,6 @@ Spark Summit 2015 Videos Posted (Jun 29, 2015) - Spark 1.4.0 released - (Jun 11, 2015) - Archive Modified: spark/site/news/spark-summit-agenda-posted.html URL: http://svn.apache.org/viewvc/spark/site/news/spark-summit-agenda-posted.html?rev=1701901=1701900=1701901=diff == --- spark/site/news/spark-summit-agenda-posted.html (original) +++ spark/site/news/spark-summit-agenda-posted.html Wed Sep 9 04:51:23 2015 @@ -134,6 +134,9 @@ Latest News + Spark 1.5.0 released + (Sep 09, 2015) + Spark Summit Europe agenda posted (Sep 07, 2015) @@ -143,9 +146,6 @@ Spark Summit 2015 Videos Posted (Jun 29, 2015) - Spark 1.4.0 released - (Jun 11, 2015) - Archive Modified: spark/site/news/spark-summit-east-2015-videos-posted.html URL: http://svn.apache.org/viewvc/spark/site/news/spark-summit-east-2015-videos-posted.html?rev=1701901=1701900=1701901=diff == --- spark/site/news/spark-summit-east-2015-videos-posted.html (original) +++ spark/site/news/spark-summit-east-2015-videos-posted.html Wed Sep 9 04:51:23 2015 @@ -134,6 +134,9 @@ Latest News + Spark 1.5.0 released + (Sep 09, 2015) + Spark Summit Europe agenda posted (Sep 07, 2015) @@ -143,9 +146,6 @@ Spark Summit 2015 Videos Posted (Jun 29, 2015) - Spark 1.4.0 released - (Jun 11, 2015) - Archive Modified: spark/site/news/spark-summit-east-agenda-posted.html URL: http://svn.apache.org/viewvc/spark/site/news/spark-summit-east-agenda-posted.html?rev=1701901=1701900=1701901=diff == --- spark/site/news/spark-summit-east-agenda-posted.html (original) +++ spark/site/news/spark-summit-east-agenda-posted.html Wed Sep 9 04:51:23 2015 @@ -134,6 +134,9 @@ Latest News + Spark 1.5.0 released + (Sep 09, 2015) + Spark Summit Europe agenda posted (Sep 07, 2015) @@ -143,9 +146,6 @@ Spark Summit 2015 Videos Posted (Jun 29, 2015) - Spark 1.4.0 released - (Jun 11, 2015) - Archive Modified: spark/site/news/spark-summit-europe-agenda-posted.html URL:
svn commit: r1701901 [3/3] - in /spark: ./ releases/_posts/ site/ site/graphx/ site/mllib/ site/news/ site/releases/ site/screencasts/ site/sql/ site/streaming/
Modified: spark/site/screencasts/1-first-steps-with-spark.html URL: http://svn.apache.org/viewvc/spark/site/screencasts/1-first-steps-with-spark.html?rev=1701901=1701900=1701901=diff == --- spark/site/screencasts/1-first-steps-with-spark.html (original) +++ spark/site/screencasts/1-first-steps-with-spark.html Wed Sep 9 04:51:23 2015 @@ -134,6 +134,9 @@ Latest News + Spark 1.5.0 released + (Sep 09, 2015) + Spark Summit Europe agenda posted (Sep 07, 2015) @@ -143,9 +146,6 @@ Spark Summit 2015 Videos Posted (Jun 29, 2015) - Spark 1.4.0 released - (Jun 11, 2015) - Archive Modified: spark/site/screencasts/2-spark-documentation-overview.html URL: http://svn.apache.org/viewvc/spark/site/screencasts/2-spark-documentation-overview.html?rev=1701901=1701900=1701901=diff == --- spark/site/screencasts/2-spark-documentation-overview.html (original) +++ spark/site/screencasts/2-spark-documentation-overview.html Wed Sep 9 04:51:23 2015 @@ -134,6 +134,9 @@ Latest News + Spark 1.5.0 released + (Sep 09, 2015) + Spark Summit Europe agenda posted (Sep 07, 2015) @@ -143,9 +146,6 @@ Spark Summit 2015 Videos Posted (Jun 29, 2015) - Spark 1.4.0 released - (Jun 11, 2015) - Archive Modified: spark/site/screencasts/3-transformations-and-caching.html URL: http://svn.apache.org/viewvc/spark/site/screencasts/3-transformations-and-caching.html?rev=1701901=1701900=1701901=diff == --- spark/site/screencasts/3-transformations-and-caching.html (original) +++ spark/site/screencasts/3-transformations-and-caching.html Wed Sep 9 04:51:23 2015 @@ -134,6 +134,9 @@ Latest News + Spark 1.5.0 released + (Sep 09, 2015) + Spark Summit Europe agenda posted (Sep 07, 2015) @@ -143,9 +146,6 @@ Spark Summit 2015 Videos Posted (Jun 29, 2015) - Spark 1.4.0 released - (Jun 11, 2015) - Archive Modified: spark/site/screencasts/4-a-standalone-job-in-spark.html URL: http://svn.apache.org/viewvc/spark/site/screencasts/4-a-standalone-job-in-spark.html?rev=1701901=1701900=1701901=diff == --- spark/site/screencasts/4-a-standalone-job-in-spark.html (original) +++ spark/site/screencasts/4-a-standalone-job-in-spark.html Wed Sep 9 04:51:23 2015 @@ -134,6 +134,9 @@ Latest News + Spark 1.5.0 released + (Sep 09, 2015) + Spark Summit Europe agenda posted (Sep 07, 2015) @@ -143,9 +146,6 @@ Spark Summit 2015 Videos Posted (Jun 29, 2015) - Spark 1.4.0 released - (Jun 11, 2015) - Archive Modified: spark/site/screencasts/index.html URL: http://svn.apache.org/viewvc/spark/site/screencasts/index.html?rev=1701901=1701900=1701901=diff == --- spark/site/screencasts/index.html (original) +++ spark/site/screencasts/index.html Wed Sep 9 04:51:23 2015 @@ -134,6 +134,9 @@ Latest News + Spark 1.5.0 released + (Sep 09, 2015) + Spark Summit Europe agenda posted (Sep 07, 2015) @@ -143,9 +146,6 @@ Spark Summit 2015 Videos Posted (Jun 29, 2015) - Spark 1.4.0 released - (Jun 11, 2015) - Archive Modified: spark/site/sql/index.html URL: http://svn.apache.org/viewvc/spark/site/sql/index.html?rev=1701901=1701900=1701901=diff == --- spark/site/sql/index.html (original) +++ spark/site/sql/index.html Wed Sep 9 04:51:23 2015 @@ -137,6 +137,9 @@ Latest News + Spark 1.5.0 released + (Sep 09, 2015) + Spark Summit Europe agenda posted (Sep 07, 2015) @@ -146,9 +149,6 @@ Spark Summit 2015 Videos Posted (Jun 29, 2015) - Spark 1.4.0 released - (Jun 11, 2015) - Archive Modified: spark/site/streaming/index.html URL: http://svn.apache.org/viewvc/spark/site/streaming/index.html?rev=1701901=1701900=1701901=diff
svn commit: r1701902 [1/2] - in /spark: ./ releases/_posts/ site/ site/graphx/ site/mllib/ site/news/ site/releases/ site/screencasts/ site/sql/ site/streaming/
Author: rxin Date: Wed Sep 9 04:54:47 2015 New Revision: 1701902 URL: http://svn.apache.org/r1701902 Log: Rolled back to pre-1.5.0 Removed: spark/releases/_posts/2015-09-09-spark-release-1-5-0.md spark/site/releases/spark-release-1-5-0.html Modified: spark/downloads.md spark/site/community.html spark/site/documentation.html spark/site/downloads.html spark/site/examples.html spark/site/faq.html spark/site/graphx/index.html spark/site/index.html spark/site/mailing-lists.html spark/site/mllib/index.html spark/site/news/amp-camp-2013-registration-ope.html spark/site/news/announcing-the-first-spark-summit.html spark/site/news/fourth-spark-screencast-published.html spark/site/news/index.html spark/site/news/nsdi-paper.html spark/site/news/one-month-to-spark-summit-2015.html spark/site/news/proposals-open-for-spark-summit-east.html spark/site/news/registration-open-for-spark-summit-east.html spark/site/news/run-spark-and-shark-on-amazon-emr.html spark/site/news/spark-0-6-1-and-0-5-2-released.html spark/site/news/spark-0-6-2-released.html spark/site/news/spark-0-7-0-released.html spark/site/news/spark-0-7-2-released.html spark/site/news/spark-0-7-3-released.html spark/site/news/spark-0-8-0-released.html spark/site/news/spark-0-8-1-released.html spark/site/news/spark-0-9-0-released.html spark/site/news/spark-0-9-1-released.html spark/site/news/spark-0-9-2-released.html spark/site/news/spark-1-0-0-released.html spark/site/news/spark-1-0-1-released.html spark/site/news/spark-1-0-2-released.html spark/site/news/spark-1-1-0-released.html spark/site/news/spark-1-1-1-released.html spark/site/news/spark-1-2-0-released.html spark/site/news/spark-1-2-1-released.html spark/site/news/spark-1-2-2-released.html spark/site/news/spark-1-3-0-released.html spark/site/news/spark-1-4-0-released.html spark/site/news/spark-1-4-1-released.html spark/site/news/spark-accepted-into-apache-incubator.html spark/site/news/spark-and-shark-in-the-news.html spark/site/news/spark-becomes-tlp.html spark/site/news/spark-featured-in-wired.html spark/site/news/spark-mailing-lists-moving-to-apache.html spark/site/news/spark-meetups.html spark/site/news/spark-screencasts-published.html spark/site/news/spark-summit-2013-is-a-wrap.html spark/site/news/spark-summit-2014-videos-posted.html spark/site/news/spark-summit-2015-videos-posted.html spark/site/news/spark-summit-agenda-posted.html spark/site/news/spark-summit-east-2015-videos-posted.html spark/site/news/spark-summit-east-agenda-posted.html spark/site/news/spark-summit-europe-agenda-posted.html spark/site/news/spark-summit-europe.html spark/site/news/spark-tips-from-quantifind.html spark/site/news/spark-user-survey-and-powered-by-page.html spark/site/news/spark-version-0-6-0-released.html spark/site/news/spark-wins-daytona-gray-sort-100tb-benchmark.html spark/site/news/strata-exercises-now-available-online.html spark/site/news/submit-talks-to-spark-summit-2014.html spark/site/news/two-weeks-to-spark-summit-2014.html spark/site/news/video-from-first-spark-development-meetup.html spark/site/releases/spark-release-0-3.html spark/site/releases/spark-release-0-5-0.html spark/site/releases/spark-release-0-5-1.html spark/site/releases/spark-release-0-5-2.html spark/site/releases/spark-release-0-6-0.html spark/site/releases/spark-release-0-6-1.html spark/site/releases/spark-release-0-6-2.html spark/site/releases/spark-release-0-7-0.html spark/site/releases/spark-release-0-7-2.html spark/site/releases/spark-release-0-7-3.html spark/site/releases/spark-release-0-8-0.html spark/site/releases/spark-release-0-8-1.html spark/site/releases/spark-release-0-9-0.html spark/site/releases/spark-release-0-9-1.html spark/site/releases/spark-release-0-9-2.html spark/site/releases/spark-release-1-0-0.html spark/site/releases/spark-release-1-0-1.html spark/site/releases/spark-release-1-0-2.html spark/site/releases/spark-release-1-1-0.html spark/site/releases/spark-release-1-1-1.html spark/site/releases/spark-release-1-2-0.html spark/site/releases/spark-release-1-2-1.html spark/site/releases/spark-release-1-2-2.html spark/site/releases/spark-release-1-3-0.html spark/site/releases/spark-release-1-3-1.html spark/site/releases/spark-release-1-4-0.html spark/site/releases/spark-release-1-4-1.html spark/site/research.html spark/site/screencasts/1-first-steps-with-spark.html spark/site/screencasts/2-spark-documentation-overview.html spark/site/screencasts/3-transformations-and-caching.html spark/site/screencasts/4-a-standalone-job-in-spark.html spark/site/screencasts/index.html spark/site/sql/index.html spark/site/streaming/index.html
svn commit: r1701902 [2/2] - in /spark: ./ releases/_posts/ site/ site/graphx/ site/mllib/ site/news/ site/releases/ site/screencasts/ site/sql/ site/streaming/
Modified: spark/site/releases/spark-release-0-5-0.html URL: http://svn.apache.org/viewvc/spark/site/releases/spark-release-0-5-0.html?rev=1701902=1701901=1701902=diff == --- spark/site/releases/spark-release-0-5-0.html (original) +++ spark/site/releases/spark-release-0-5-0.html Wed Sep 9 04:54:47 2015 @@ -134,9 +134,6 @@ Latest News - Spark 1.5.0 released - (Sep 09, 2015) - Spark Summit Europe agenda posted (Sep 07, 2015) @@ -146,6 +143,9 @@ Spark Summit 2015 Videos Posted (Jun 29, 2015) + Spark 1.4.0 released + (Jun 11, 2015) + Archive Modified: spark/site/releases/spark-release-0-5-1.html URL: http://svn.apache.org/viewvc/spark/site/releases/spark-release-0-5-1.html?rev=1701902=1701901=1701902=diff == --- spark/site/releases/spark-release-0-5-1.html (original) +++ spark/site/releases/spark-release-0-5-1.html Wed Sep 9 04:54:47 2015 @@ -134,9 +134,6 @@ Latest News - Spark 1.5.0 released - (Sep 09, 2015) - Spark Summit Europe agenda posted (Sep 07, 2015) @@ -146,6 +143,9 @@ Spark Summit 2015 Videos Posted (Jun 29, 2015) + Spark 1.4.0 released + (Jun 11, 2015) + Archive Modified: spark/site/releases/spark-release-0-5-2.html URL: http://svn.apache.org/viewvc/spark/site/releases/spark-release-0-5-2.html?rev=1701902=1701901=1701902=diff == --- spark/site/releases/spark-release-0-5-2.html (original) +++ spark/site/releases/spark-release-0-5-2.html Wed Sep 9 04:54:47 2015 @@ -134,9 +134,6 @@ Latest News - Spark 1.5.0 released - (Sep 09, 2015) - Spark Summit Europe agenda posted (Sep 07, 2015) @@ -146,6 +143,9 @@ Spark Summit 2015 Videos Posted (Jun 29, 2015) + Spark 1.4.0 released + (Jun 11, 2015) + Archive Modified: spark/site/releases/spark-release-0-6-0.html URL: http://svn.apache.org/viewvc/spark/site/releases/spark-release-0-6-0.html?rev=1701902=1701901=1701902=diff == --- spark/site/releases/spark-release-0-6-0.html (original) +++ spark/site/releases/spark-release-0-6-0.html Wed Sep 9 04:54:47 2015 @@ -134,9 +134,6 @@ Latest News - Spark 1.5.0 released - (Sep 09, 2015) - Spark Summit Europe agenda posted (Sep 07, 2015) @@ -146,6 +143,9 @@ Spark Summit 2015 Videos Posted (Jun 29, 2015) + Spark 1.4.0 released + (Jun 11, 2015) + Archive Modified: spark/site/releases/spark-release-0-6-1.html URL: http://svn.apache.org/viewvc/spark/site/releases/spark-release-0-6-1.html?rev=1701902=1701901=1701902=diff == --- spark/site/releases/spark-release-0-6-1.html (original) +++ spark/site/releases/spark-release-0-6-1.html Wed Sep 9 04:54:47 2015 @@ -134,9 +134,6 @@ Latest News - Spark 1.5.0 released - (Sep 09, 2015) - Spark Summit Europe agenda posted (Sep 07, 2015) @@ -146,6 +143,9 @@ Spark Summit 2015 Videos Posted (Jun 29, 2015) + Spark 1.4.0 released + (Jun 11, 2015) + Archive Modified: spark/site/releases/spark-release-0-6-2.html URL: http://svn.apache.org/viewvc/spark/site/releases/spark-release-0-6-2.html?rev=1701902=1701901=1701902=diff == --- spark/site/releases/spark-release-0-6-2.html (original) +++ spark/site/releases/spark-release-0-6-2.html Wed Sep 9 04:54:47 2015 @@ -134,9 +134,6 @@ Latest News - Spark 1.5.0 released - (Sep 09, 2015) - Spark Summit Europe agenda posted (Sep 07, 2015) @@ -146,6 +143,9 @@ Spark Summit 2015 Videos Posted (Jun 29, 2015) + Spark 1.4.0 released + (Jun 11, 2015) + Archive Modified: spark/site/releases/spark-release-0-7-0.html URL: http://svn.apache.org/viewvc/spark/site/releases/spark-release-0-7-0.html?rev=1701902=1701901=1701902=diff == ---