spark git commit: [SPARK-10809][MLLIB] Single-document topicDistributions method for LocalLDAModel

2016-01-11 Thread jkbradley
Repository: spark
Updated Branches:
  refs/heads/master 4f8eefa36 -> bbea88852


[SPARK-10809][MLLIB] Single-document topicDistributions method for LocalLDAModel

jira: https://issues.apache.org/jira/browse/SPARK-10809

We could provide a single-document topicDistributions method for LocalLDAModel 
to allow for quick queries which avoid RDD operations. Currently, the user must 
use an RDD of documents.

add some missing assert too.

Author: Yuhao Yang 

Closes #9484 from hhbyyh/ldaTopicPre.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/bbea8885
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/bbea8885
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/bbea8885

Branch: refs/heads/master
Commit: bbea88852ce6a3127d071ca40dbca2d042f9fbcf
Parents: 4f8eefa
Author: Yuhao Yang 
Authored: Mon Jan 11 14:55:44 2016 -0800
Committer: Joseph K. Bradley 
Committed: Mon Jan 11 14:55:44 2016 -0800

--
 .../spark/mllib/clustering/LDAModel.scala   | 26 
 .../spark/mllib/clustering/LDASuite.scala   | 15 ---
 2 files changed, 38 insertions(+), 3 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/bbea8885/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAModel.scala
--
diff --git 
a/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAModel.scala 
b/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAModel.scala
index 2fce3ff..b30ecb8 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAModel.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAModel.scala
@@ -388,6 +388,32 @@ class LocalLDAModel private[spark] (
   }
 
   /**
+   * Predicts the topic mixture distribution for a document (often called 
"theta" in the
+   * literature).  Returns a vector of zeros for an empty document.
+   *
+   * Note this means to allow quick query for single document. For batch 
documents, please refer
+   * to [[topicDistributions()]] to avoid overhead.
+   *
+   * @param document document to predict topic mixture distributions for
+   * @return topic mixture distribution for the document
+   */
+  @Since("2.0.0")
+  def topicDistribution(document: Vector): Vector = {
+val expElogbeta = 
exp(LDAUtils.dirichletExpectation(topicsMatrix.toBreeze.toDenseMatrix.t).t)
+if (document.numNonzeros == 0) {
+  Vectors.zeros(this.k)
+} else {
+  val (gamma, _) = OnlineLDAOptimizer.variationalTopicInference(
+document,
+expElogbeta,
+this.docConcentration.toBreeze,
+gammaShape,
+this.k)
+  Vectors.dense(normalize(gamma, 1.0).toArray)
+}
+  }
+
+  /**
* Java-friendly version of [[topicDistributions]]
*/
   @Since("1.4.1")

http://git-wip-us.apache.org/repos/asf/spark/blob/bbea8885/mllib/src/test/scala/org/apache/spark/mllib/clustering/LDASuite.scala
--
diff --git 
a/mllib/src/test/scala/org/apache/spark/mllib/clustering/LDASuite.scala 
b/mllib/src/test/scala/org/apache/spark/mllib/clustering/LDASuite.scala
index faef60e..ea23196 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/clustering/LDASuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/clustering/LDASuite.scala
@@ -366,7 +366,8 @@ class LDASuite extends SparkFunSuite with 
MLlibTestSparkContext {
   (0, 0.99504), (1, 0.99504),
   (1, 0.99504), (1, 0.99504))
 
-val actualPredictions = ldaModel.topicDistributions(docs).map { case (id, 
topics) =>
+val actualPredictions = ldaModel.topicDistributions(docs).cache()
+val topTopics = actualPredictions.map { case (id, topics) =>
 // convert results to expectedPredictions format, which only has 
highest probability topic
 val topicsBz = topics.toBreeze.toDenseVector
 (id, (argmax(topicsBz), max(topicsBz)))
@@ -374,9 +375,17 @@ class LDASuite extends SparkFunSuite with 
MLlibTestSparkContext {
   .values
   .collect()
 
-expectedPredictions.zip(actualPredictions).forall { case (expected, 
actual) =>
-  expected._1 === actual._1 && (expected._2 ~== actual._2 relTol 1E-3D)
+expectedPredictions.zip(topTopics).foreach { case (expected, actual) =>
+  assert(expected._1 === actual._1 && (expected._2 ~== actual._2 relTol 
1E-3D))
 }
+
+docs.collect()
+  .map(doc => ldaModel.topicDistribution(doc._2))
+  .zip(actualPredictions.map(_._2).collect())
+  .foreach { case (single, batch) =>
+assert(single ~== batch relTol 1E-3D)
+  }
+actualPredictions.unpersist()
   }
 
   test("OnlineLDAOptimizer with asymmetric prior") {



spark git commit: [SPARK-12758][SQL] add note to Spark SQL Migration guide about TimestampType casting

2016-01-11 Thread marmbrus
Repository: spark
Updated Branches:
  refs/heads/master a44991453 -> a767ee8a0


[SPARK-12758][SQL] add note to Spark SQL Migration guide about TimestampType 
casting

Warning users about casting changes.

Author: Brandon Bradley 

Closes #10708 from blbradley/spark-12758.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/a767ee8a
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/a767ee8a
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/a767ee8a

Branch: refs/heads/master
Commit: a767ee8a0599f5482717493a3298413c65d8ff89
Parents: a449914
Author: Brandon Bradley 
Authored: Mon Jan 11 14:21:50 2016 -0800
Committer: Michael Armbrust 
Committed: Mon Jan 11 14:21:50 2016 -0800

--
 docs/sql-programming-guide.md | 5 +
 1 file changed, 5 insertions(+)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/a767ee8a/docs/sql-programming-guide.md
--
diff --git a/docs/sql-programming-guide.md b/docs/sql-programming-guide.md
index b058833..bc89c78 100644
--- a/docs/sql-programming-guide.md
+++ b/docs/sql-programming-guide.md
@@ -2151,6 +2151,11 @@ options.
  ...
{% endhighlight %}
 
+ - From Spark 1.6, LongType casts to TimestampType expect seconds instead of 
microseconds. This
+   change was made to match the behavior of Hive 1.2 for more consistent type 
casting to TimestampType
+   from numeric types. See 
[SPARK-11724](https://issues.apache.org/jira/browse/SPARK-11724) for
+   details.
+
 ## Upgrading From Spark SQL 1.4 to 1.5
 
  - Optimized execution using manually managed memory (Tungsten) is now enabled 
by default, along with


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org



spark git commit: [SPARK-12576][SQL] Enable expression parsing in CatalystQl

2016-01-11 Thread rxin
Repository: spark
Updated Branches:
  refs/heads/master bbea88852 -> fe9eb0b0c


[SPARK-12576][SQL] Enable expression parsing in CatalystQl

The PR allows us to use the new SQL parser to parse SQL expressions such as: 
```1 + sin(x*x)```

We enable this functionality in this PR, but we will not start using this 
actively yet. This will be done as soon as we have reached grammar parity with 
the existing parser stack.

cc rxin

Author: Herman van Hovell 

Closes #10649 from hvanhovell/SPARK-12576.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/fe9eb0b0
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/fe9eb0b0
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/fe9eb0b0

Branch: refs/heads/master
Commit: fe9eb0b0ce397aeb40a32f8231d2ce8c17d7a609
Parents: bbea888
Author: Herman van Hovell 
Authored: Mon Jan 11 16:29:37 2016 -0800
Committer: Reynold Xin 
Committed: Mon Jan 11 16:29:37 2016 -0800

--
 .../sql/catalyst/parser/SelectClauseParser.g|   7 +
 .../apache/spark/sql/catalyst/CatalystQl.scala  |  59 +---
 .../spark/sql/catalyst/parser/ParseDriver.scala |  24 ++-
 .../spark/sql/catalyst/CatalystQlSuite.scala| 151 +--
 .../spark/sql/hive/ExtendedHiveQlParser.scala   |   2 +-
 .../spark/sql/hive/HiveMetastoreCatalog.scala   |   4 +-
 .../org/apache/spark/sql/hive/HiveQl.scala  |  19 +--
 .../spark/sql/hive/ErrorPositionSuite.scala |   5 +-
 .../org/apache/spark/sql/hive/HiveQlSuite.scala |   2 +-
 9 files changed, 217 insertions(+), 56 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/fe9eb0b0/sql/catalyst/src/main/antlr3/org/apache/spark/sql/catalyst/parser/SelectClauseParser.g
--
diff --git 
a/sql/catalyst/src/main/antlr3/org/apache/spark/sql/catalyst/parser/SelectClauseParser.g
 
b/sql/catalyst/src/main/antlr3/org/apache/spark/sql/catalyst/parser/SelectClauseParser.g
index 2d2bafb..f18b6ec 100644
--- 
a/sql/catalyst/src/main/antlr3/org/apache/spark/sql/catalyst/parser/SelectClauseParser.g
+++ 
b/sql/catalyst/src/main/antlr3/org/apache/spark/sql/catalyst/parser/SelectClauseParser.g
@@ -131,6 +131,13 @@ selectItem
 :
 (tableAllColumns) => tableAllColumns -> ^(TOK_SELEXPR tableAllColumns)
 |
+namedExpression
+;
+
+namedExpression
+@init { gParent.pushMsg("select named expression", state); }
+@after { gParent.popMsg(state); }
+:
 ( expression
   ((KW_AS? identifier) | (KW_AS LPAREN identifier (COMMA identifier)* 
RPAREN))?
 ) -> ^(TOK_SELEXPR expression identifier*)

http://git-wip-us.apache.org/repos/asf/spark/blob/fe9eb0b0/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/CatalystQl.scala
--
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/CatalystQl.scala 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/CatalystQl.scala
index 2e3cc0b..c87b6c8 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/CatalystQl.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/CatalystQl.scala
@@ -30,6 +30,12 @@ import org.apache.spark.sql.types._
 import org.apache.spark.unsafe.types.CalendarInterval
 import org.apache.spark.util.random.RandomSampler
 
+private[sql] object CatalystQl {
+  val parser = new CatalystQl
+  def parseExpression(sql: String): Expression = parser.parseExpression(sql)
+  def parseTableIdentifier(sql: String): TableIdentifier = 
parser.parseTableIdentifier(sql)
+}
+
 /**
  * This class translates a HQL String to a Catalyst [[LogicalPlan]] or 
[[Expression]].
  */
@@ -41,16 +47,13 @@ private[sql] class CatalystQl(val conf: ParserConf = 
SimpleParserConf()) {
 }
   }
 
-
   /**
-   * Returns the AST for the given SQL string.
+   * The safeParse method allows a user to focus on the parsing/AST 
transformation logic. This
+   * method will take care of possible errors during the parsing process.
*/
-  protected def getAst(sql: String): ASTNode = ParseDriver.parse(sql, conf)
-
-  /** Creates LogicalPlan for a given HiveQL string. */
-  def createPlan(sql: String): LogicalPlan = {
+  protected def safeParse[T](sql: String, ast: ASTNode)(toResult: ASTNode => 
T): T = {
 try {
-  createPlan(sql, ParseDriver.parse(sql, conf))
+  toResult(ast)
 } catch {
   case e: MatchError => throw e
   case e: AnalysisException => throw e
@@ -58,26 +61,39 @@ private[sql] class CatalystQl(val conf: ParserConf = 
SimpleParserConf()) {
 throw new AnalysisException(e.getMessage)
   case e: NotImplementedError =>
 throw new AnalysisException(
-  s"""
- |Unsupported language 

spark git commit: [SPARK-12758][SQL] add note to Spark SQL Migration guide about TimestampType casting

2016-01-11 Thread marmbrus
Repository: spark
Updated Branches:
  refs/heads/branch-1.6 3b32aa9e2 -> dd2cf64f3


[SPARK-12758][SQL] add note to Spark SQL Migration guide about TimestampType 
casting

Warning users about casting changes.

Author: Brandon Bradley 

Closes #10708 from blbradley/spark-12758.

(cherry picked from commit a767ee8a0599f5482717493a3298413c65d8ff89)
Signed-off-by: Michael Armbrust 


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/dd2cf64f
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/dd2cf64f
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/dd2cf64f

Branch: refs/heads/branch-1.6
Commit: dd2cf64f300ec42802dbea38b95047842de81870
Parents: 3b32aa9
Author: Brandon Bradley 
Authored: Mon Jan 11 14:21:50 2016 -0800
Committer: Michael Armbrust 
Committed: Mon Jan 11 14:22:15 2016 -0800

--
 docs/sql-programming-guide.md | 5 +
 1 file changed, 5 insertions(+)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/dd2cf64f/docs/sql-programming-guide.md
--
diff --git a/docs/sql-programming-guide.md b/docs/sql-programming-guide.md
index b058833..bc89c78 100644
--- a/docs/sql-programming-guide.md
+++ b/docs/sql-programming-guide.md
@@ -2151,6 +2151,11 @@ options.
  ...
{% endhighlight %}
 
+ - From Spark 1.6, LongType casts to TimestampType expect seconds instead of 
microseconds. This
+   change was made to match the behavior of Hive 1.2 for more consistent type 
casting to TimestampType
+   from numeric types. See 
[SPARK-11724](https://issues.apache.org/jira/browse/SPARK-11724) for
+   details.
+
 ## Upgrading From Spark SQL 1.4 to 1.5
 
  - Optimized execution using manually managed memory (Tungsten) is now enabled 
by default, along with


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org



spark git commit: [SPARK-12685][MLLIB] word2vec trainWordsCount gets overflow

2016-01-11 Thread jkbradley
Repository: spark
Updated Branches:
  refs/heads/master ee4ee02b8 -> 4f8eefa36


[SPARK-12685][MLLIB] word2vec trainWordsCount gets overflow

jira: https://issues.apache.org/jira/browse/SPARK-12685
the log of `word2vec` reports
trainWordsCount = -785727483
during computation over a large dataset.

Update the priority as it will affect the computation process.
`alpha = learningRate * (1 - numPartitions * wordCount.toDouble / 
(trainWordsCount + 1))`

Author: Yuhao Yang 

Closes #10627 from hhbyyh/w2voverflow.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/4f8eefa3
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/4f8eefa3
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/4f8eefa3

Branch: refs/heads/master
Commit: 4f8eefa36bb90812aac61ac7a762c9452de666bf
Parents: ee4ee02
Author: Yuhao Yang 
Authored: Mon Jan 11 14:48:35 2016 -0800
Committer: Joseph K. Bradley 
Committed: Mon Jan 11 14:48:35 2016 -0800

--
 .../main/scala/org/apache/spark/mllib/feature/Word2Vec.scala | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/4f8eefa3/mllib/src/main/scala/org/apache/spark/mllib/feature/Word2Vec.scala
--
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/feature/Word2Vec.scala 
b/mllib/src/main/scala/org/apache/spark/mllib/feature/Word2Vec.scala
index a7e1b76..dc5d070 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/feature/Word2Vec.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/feature/Word2Vec.scala
@@ -151,7 +151,7 @@ class Word2Vec extends Serializable with Logging {
   /** context words from [-window, window] */
   private var window = 5
 
-  private var trainWordsCount = 0
+  private var trainWordsCount = 0L
   private var vocabSize = 0
   @transient private var vocab: Array[VocabWord] = null
   @transient private var vocabHash = mutable.HashMap.empty[String, Int]
@@ -159,13 +159,13 @@ class Word2Vec extends Serializable with Logging {
   private def learnVocab(words: RDD[String]): Unit = {
 vocab = words.map(w => (w, 1))
   .reduceByKey(_ + _)
+  .filter(_._2 >= minCount)
   .map(x => VocabWord(
 x._1,
 x._2,
 new Array[Int](MAX_CODE_LENGTH),
 new Array[Int](MAX_CODE_LENGTH),
 0))
-  .filter(_.cn >= minCount)
   .collect()
   .sortWith((a, b) => a.cn > b.cn)
 
@@ -179,7 +179,7 @@ class Word2Vec extends Serializable with Logging {
   trainWordsCount += vocab(a).cn
   a += 1
 }
-logInfo("trainWordsCount = " + trainWordsCount)
+logInfo(s"vocabSize = $vocabSize, trainWordsCount = $trainWordsCount")
   }
 
   private def createExpTable(): Array[Float] = {
@@ -332,7 +332,7 @@ class Word2Vec extends Serializable with Logging {
 val random = new XORShiftRandom(seed ^ ((idx + 1) << 16) ^ ((-k - 1) 
<< 8))
 val syn0Modify = new Array[Int](vocabSize)
 val syn1Modify = new Array[Int](vocabSize)
-val model = iter.foldLeft((bcSyn0Global.value, bcSyn1Global.value, 0, 
0)) {
+val model = iter.foldLeft((bcSyn0Global.value, bcSyn1Global.value, 0L, 
0L)) {
   case ((syn0, syn1, lastWordCount, wordCount), sentence) =>
 var lwc = lastWordCount
 var wc = wordCount


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org



spark git commit: [SPARK-12603][MLLIB] PySpark MLlib GaussianMixtureModel should support single instance predict/predictSoft

2016-01-11 Thread jkbradley
Repository: spark
Updated Branches:
  refs/heads/master a767ee8a0 -> ee4ee02b8


[SPARK-12603][MLLIB] PySpark MLlib GaussianMixtureModel should support single 
instance predict/predictSoft

PySpark MLlib ```GaussianMixtureModel``` should support single instance 
```predict/predictSoft``` just like Scala do.

Author: Yanbo Liang 

Closes #10552 from yanboliang/spark-12603.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/ee4ee02b
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/ee4ee02b
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/ee4ee02b

Branch: refs/heads/master
Commit: ee4ee02b86be8756a6d895a2e23e80862134a6d3
Parents: a767ee8
Author: Yanbo Liang 
Authored: Mon Jan 11 14:43:25 2016 -0800
Committer: Joseph K. Bradley 
Committed: Mon Jan 11 14:43:25 2016 -0800

--
 .../main/python/mllib/gaussian_mixture_model.py |  4 +++
 .../examples/mllib/DenseGaussianMixture.scala   |  6 
 .../python/GaussianMixtureModelWrapper.scala|  4 +++
 .../mllib/clustering/GaussianMixtureModel.scala |  2 +-
 python/pyspark/mllib/clustering.py  | 35 
 5 files changed, 37 insertions(+), 14 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/ee4ee02b/examples/src/main/python/mllib/gaussian_mixture_model.py
--
diff --git a/examples/src/main/python/mllib/gaussian_mixture_model.py 
b/examples/src/main/python/mllib/gaussian_mixture_model.py
index 2cb8010..69e836f 100644
--- a/examples/src/main/python/mllib/gaussian_mixture_model.py
+++ b/examples/src/main/python/mllib/gaussian_mixture_model.py
@@ -62,5 +62,9 @@ if __name__ == "__main__":
 for i in range(args.k):
 print(("weight = ", model.weights[i], "mu = ", model.gaussians[i].mu,
"sigma = ", model.gaussians[i].sigma.toArray()))
+print("\n")
+print(("The membership value of each vector to all mixture components 
(first 100): ",
+   model.predictSoft(data).take(100)))
+print("\n")
 print(("Cluster labels (first 100): ", model.predict(data).take(100)))
 sc.stop()

http://git-wip-us.apache.org/repos/asf/spark/blob/ee4ee02b/examples/src/main/scala/org/apache/spark/examples/mllib/DenseGaussianMixture.scala
--
diff --git 
a/examples/src/main/scala/org/apache/spark/examples/mllib/DenseGaussianMixture.scala
 
b/examples/src/main/scala/org/apache/spark/examples/mllib/DenseGaussianMixture.scala
index 1fce4ba..90b817b 100644
--- 
a/examples/src/main/scala/org/apache/spark/examples/mllib/DenseGaussianMixture.scala
+++ 
b/examples/src/main/scala/org/apache/spark/examples/mllib/DenseGaussianMixture.scala
@@ -58,6 +58,12 @@ object DenseGaussianMixture {
 (clusters.weights(i), clusters.gaussians(i).mu, 
clusters.gaussians(i).sigma))
 }
 
+println("The membership value of each vector to all mixture components 
(first <= 100):")
+val membership = clusters.predictSoft(data)
+membership.take(100).foreach { x =>
+  print(" " + x.mkString(","))
+}
+println()
 println("Cluster labels (first <= 100):")
 val clusterLabels = clusters.predict(data)
 clusterLabels.take(100).foreach { x =>

http://git-wip-us.apache.org/repos/asf/spark/blob/ee4ee02b/mllib/src/main/scala/org/apache/spark/mllib/api/python/GaussianMixtureModelWrapper.scala
--
diff --git 
a/mllib/src/main/scala/org/apache/spark/mllib/api/python/GaussianMixtureModelWrapper.scala
 
b/mllib/src/main/scala/org/apache/spark/mllib/api/python/GaussianMixtureModelWrapper.scala
index 6a3b20c..a689b09 100644
--- 
a/mllib/src/main/scala/org/apache/spark/mllib/api/python/GaussianMixtureModelWrapper.scala
+++ 
b/mllib/src/main/scala/org/apache/spark/mllib/api/python/GaussianMixtureModelWrapper.scala
@@ -40,5 +40,9 @@ private[python] class GaussianMixtureModelWrapper(model: 
GaussianMixtureModel) {
 SerDe.dumps(JavaConverters.seqAsJavaListConverter(modelGaussians).asJava)
   }
 
+  def predictSoft(point: Vector): Vector = {
+Vectors.dense(model.predictSoft(point))
+  }
+
   def save(sc: SparkContext, path: String): Unit = model.save(sc, path)
 }

http://git-wip-us.apache.org/repos/asf/spark/blob/ee4ee02b/mllib/src/main/scala/org/apache/spark/mllib/clustering/GaussianMixtureModel.scala
--
diff --git 
a/mllib/src/main/scala/org/apache/spark/mllib/clustering/GaussianMixtureModel.scala
 
b/mllib/src/main/scala/org/apache/spark/mllib/clustering/GaussianMixtureModel.scala
index 16bc45b..42fe270 100644
--- 

spark git commit: [SPARK-12734][HOTFIX][TEST-MAVEN] Fix bug in Netty exclusions

2016-01-11 Thread joshrosen
Repository: spark
Updated Branches:
  refs/heads/master 008a55828 -> f13c7f8f7


[SPARK-12734][HOTFIX][TEST-MAVEN] Fix bug in Netty exclusions

This is a hotfix for a build bug introduced by the Netty exclusion changes in 
#10672. We can't exclude `io.netty:netty` because Akka depends on it. There's 
not a direct conflict between `io.netty:netty` and `io.netty:netty-all`, 
because the former puts classes in the `org.jboss.netty` namespace while the 
latter uses the `io.netty` namespace. However, there still is a conflict 
between `org.jboss.netty:netty` and `io.netty:netty`, so we need to continue to 
exclude the JBoss version of that artifact.

While the diff here looks somewhat large, note that this is only a revert of a 
some of the changes from #10672. You can see the net changes in pom.xml at 
https://github.com/apache/spark/compare/3119206b7188c23055621dfeaf6874f21c711a82...5211ab8#diff-600376dffeb79835ede4a0b285078036

Author: Josh Rosen 

Closes #10693 from JoshRosen/netty-hotfix.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/f13c7f8f
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/f13c7f8f
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/f13c7f8f

Branch: refs/heads/master
Commit: f13c7f8f7dc8766b0a42406b5c3639d6be55cf33
Parents: 008a558
Author: Josh Rosen 
Authored: Mon Jan 11 00:31:29 2016 -0800
Committer: Josh Rosen 
Committed: Mon Jan 11 00:31:29 2016 -0800

--
 dev/deps/spark-deps-hadoop-2.2 |  1 +
 dev/deps/spark-deps-hadoop-2.3 |  1 +
 dev/deps/spark-deps-hadoop-2.4 |  1 +
 dev/deps/spark-deps-hadoop-2.6 |  1 +
 examples/pom.xml   |  4 ---
 pom.xml| 50 ++---
 6 files changed, 11 insertions(+), 47 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/f13c7f8f/dev/deps/spark-deps-hadoop-2.2
--
diff --git a/dev/deps/spark-deps-hadoop-2.2 b/dev/deps/spark-deps-hadoop-2.2
index 13d1b0e..e4373f7 100644
--- a/dev/deps/spark-deps-hadoop-2.2
+++ b/dev/deps/spark-deps-hadoop-2.2
@@ -142,6 +142,7 @@ metrics-graphite-3.1.2.jar
 metrics-json-3.1.2.jar
 metrics-jvm-3.1.2.jar
 minlog-1.2.jar
+netty-3.8.0.Final.jar
 netty-all-4.0.29.Final.jar
 objenesis-1.2.jar
 opencsv-2.3.jar

http://git-wip-us.apache.org/repos/asf/spark/blob/f13c7f8f/dev/deps/spark-deps-hadoop-2.3
--
diff --git a/dev/deps/spark-deps-hadoop-2.3 b/dev/deps/spark-deps-hadoop-2.3
index d7deaa0..7478181 100644
--- a/dev/deps/spark-deps-hadoop-2.3
+++ b/dev/deps/spark-deps-hadoop-2.3
@@ -133,6 +133,7 @@ metrics-json-3.1.2.jar
 metrics-jvm-3.1.2.jar
 minlog-1.2.jar
 mx4j-3.0.2.jar
+netty-3.8.0.Final.jar
 netty-all-4.0.29.Final.jar
 objenesis-1.2.jar
 opencsv-2.3.jar

http://git-wip-us.apache.org/repos/asf/spark/blob/f13c7f8f/dev/deps/spark-deps-hadoop-2.4
--
diff --git a/dev/deps/spark-deps-hadoop-2.4 b/dev/deps/spark-deps-hadoop-2.4
index 7ad2212..faffb8b 100644
--- a/dev/deps/spark-deps-hadoop-2.4
+++ b/dev/deps/spark-deps-hadoop-2.4
@@ -134,6 +134,7 @@ metrics-json-3.1.2.jar
 metrics-jvm-3.1.2.jar
 minlog-1.2.jar
 mx4j-3.0.2.jar
+netty-3.8.0.Final.jar
 netty-all-4.0.29.Final.jar
 objenesis-1.2.jar
 opencsv-2.3.jar

http://git-wip-us.apache.org/repos/asf/spark/blob/f13c7f8f/dev/deps/spark-deps-hadoop-2.6
--
diff --git a/dev/deps/spark-deps-hadoop-2.6 b/dev/deps/spark-deps-hadoop-2.6
index 7f85189..e703c7a 100644
--- a/dev/deps/spark-deps-hadoop-2.6
+++ b/dev/deps/spark-deps-hadoop-2.6
@@ -140,6 +140,7 @@ metrics-json-3.1.2.jar
 metrics-jvm-3.1.2.jar
 minlog-1.2.jar
 mx4j-3.0.2.jar
+netty-3.8.0.Final.jar
 netty-all-4.0.29.Final.jar
 objenesis-1.2.jar
 opencsv-2.3.jar

http://git-wip-us.apache.org/repos/asf/spark/blob/f13c7f8f/examples/pom.xml
--
diff --git a/examples/pom.xml b/examples/pom.xml
index 6013085..1a0d5e5 100644
--- a/examples/pom.xml
+++ b/examples/pom.xml
@@ -111,10 +111,6 @@
   org.jruby
   jruby-complete
 
-
-  io.netty
-  netty
-
   
 
 

http://git-wip-us.apache.org/repos/asf/spark/blob/f13c7f8f/pom.xml
--
diff --git a/pom.xml b/pom.xml
index cbed36c..06cccf1 100644
--- a/pom.xml
+++ b/pom.xml
@@ -519,12 +519,6 @@
 ${akka.group}
 akka-remote_${scala.binary.version}
 ${akka.version}
-
-  
-io.netty
-netty
-  

spark git commit: [SPARK-12539][FOLLOW-UP] always sort in partitioning writer

2016-01-11 Thread rxin
Repository: spark
Updated Branches:
  refs/heads/master f13c7f8f7 -> f253feff6


[SPARK-12539][FOLLOW-UP] always sort in partitioning writer

address comments in #10498 , especially 
https://github.com/apache/spark/pull/10498#discussion_r49021259

Author: Wenchen Fan 

This patch had conflicts when merged, resolved by
Committer: Reynold Xin 

Closes #10638 from cloud-fan/bucket-write.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/f253feff
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/f253feff
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/f253feff

Branch: refs/heads/master
Commit: f253feff62f3eb3cce22bbec0874f317a61b0092
Parents: f13c7f8
Author: Wenchen Fan 
Authored: Mon Jan 11 00:44:33 2016 -0800
Committer: Reynold Xin 
Committed: Mon Jan 11 00:44:33 2016 -0800

--
 .../execution/datasources/WriterContainer.scala | 192 +--
 .../apache/spark/sql/sources/interfaces.scala   |   3 -
 2 files changed, 48 insertions(+), 147 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/f253feff/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/WriterContainer.scala
--
diff --git 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/WriterContainer.scala
 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/WriterContainer.scala
index 40ecdb8..fff7287 100644
--- 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/WriterContainer.scala
+++ 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/WriterContainer.scala
@@ -33,7 +33,7 @@ import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.execution.UnsafeKVExternalSorter
 import org.apache.spark.sql.sources.{HadoopFsRelation, OutputWriter, 
OutputWriterFactory}
-import org.apache.spark.sql.types.{IntegerType, StringType, StructType}
+import org.apache.spark.sql.types.{IntegerType, StringType, StructField, 
StructType}
 import org.apache.spark.util.SerializableConfiguration
 
 
@@ -349,67 +349,6 @@ private[sql] class DynamicPartitionWriterContainer(
 }
   }
 
-  private def sameBucket(key1: UnsafeRow, key2: UnsafeRow): Boolean = {
-val bucketIdIndex = partitionColumns.length
-if (key1.getInt(bucketIdIndex) != key2.getInt(bucketIdIndex)) {
-  false
-} else {
-  var i = partitionColumns.length - 1
-  while (i >= 0) {
-val dt = partitionColumns(i).dataType
-if (key1.get(i, dt) != key2.get(i, dt)) return false
-i -= 1
-  }
-  true
-}
-  }
-
-  private def sortBasedWrite(
-  sorter: UnsafeKVExternalSorter,
-  iterator: Iterator[InternalRow],
-  getSortingKey: UnsafeProjection,
-  getOutputRow: UnsafeProjection,
-  getPartitionString: UnsafeProjection,
-  outputWriters: java.util.HashMap[InternalRow, OutputWriter]): Unit = {
-while (iterator.hasNext) {
-  val currentRow = iterator.next()
-  sorter.insertKV(getSortingKey(currentRow), getOutputRow(currentRow))
-}
-
-logInfo(s"Sorting complete. Writing out partition files one at a time.")
-
-val needNewWriter: (UnsafeRow, UnsafeRow) => Boolean = if 
(sortColumns.isEmpty) {
-  (key1, key2) => key1 != key2
-} else {
-  (key1, key2) => key1 == null || !sameBucket(key1, key2)
-}
-
-val sortedIterator = sorter.sortedIterator()
-var currentKey: UnsafeRow = null
-var currentWriter: OutputWriter = null
-try {
-  while (sortedIterator.next()) {
-if (needNewWriter(currentKey, sortedIterator.getKey)) {
-  if (currentWriter != null) {
-currentWriter.close()
-  }
-  currentKey = sortedIterator.getKey.copy()
-  logDebug(s"Writing partition: $currentKey")
-
-  // Either use an existing file from before, or open a new one.
-  currentWriter = outputWriters.remove(currentKey)
-  if (currentWriter == null) {
-currentWriter = newOutputWriter(currentKey, getPartitionString)
-  }
-}
-
-currentWriter.writeInternal(sortedIterator.getValue)
-  }
-} finally {
-  if (currentWriter != null) { currentWriter.close() }
-}
-  }
-
   /**
* Open and returns a new OutputWriter given a partition key and optional 
bucket id.
* If bucket id is specified, we will append it to the end of the file name, 
but before the
@@ -435,22 +374,18 @@ private[sql] class DynamicPartitionWriterContainer(
   }
 
   def writeRows(taskContext: TaskContext, iterator: Iterator[InternalRow]): 
Unit = {
-val outputWriters = new 

spark git commit: [SPARK-12269][STREAMING][KINESIS] Update aws-java-sdk version

2016-01-11 Thread srowen
Repository: spark
Updated Branches:
  refs/heads/master bd723bd53 -> 8fe928b4f


[SPARK-12269][STREAMING][KINESIS] Update aws-java-sdk version

The current Spark Streaming kinesis connector references a quite old version 
1.9.40 of the AWS Java SDK (1.10.40 is current). Numerous AWS features 
including Kinesis Firehose are unavailable in 1.9. Those two versions of the 
AWS SDK in turn require conflicting versions of Jackson (2.4.4 and 2.5.3 
respectively) such that one cannot include the current AWS SDK in a project 
that also uses the Spark Streaming Kinesis ASL.

Author: BrianLondon 

Closes #10256 from BrianLondon/master.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/8fe928b4
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/8fe928b4
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/8fe928b4

Branch: refs/heads/master
Commit: 8fe928b4fe380ba527164bd413402abfed13c0e1
Parents: bd723bd
Author: BrianLondon 
Authored: Mon Jan 11 09:32:06 2016 +
Committer: Sean Owen 
Committed: Mon Jan 11 09:32:06 2016 +

--
 dev/deps/spark-deps-hadoop-2.2 | 8 
 dev/deps/spark-deps-hadoop-2.3 | 8 
 dev/deps/spark-deps-hadoop-2.4 | 8 
 dev/deps/spark-deps-hadoop-2.6 | 8 
 pom.xml| 6 +++---
 5 files changed, 19 insertions(+), 19 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/8fe928b4/dev/deps/spark-deps-hadoop-2.2
--
diff --git a/dev/deps/spark-deps-hadoop-2.2 b/dev/deps/spark-deps-hadoop-2.2
index e4373f7..cd3ff29 100644
--- a/dev/deps/spark-deps-hadoop-2.2
+++ b/dev/deps/spark-deps-hadoop-2.2
@@ -84,13 +84,13 @@ hadoop-yarn-server-web-proxy-2.2.0.jar
 httpclient-4.3.2.jar
 httpcore-4.3.2.jar
 ivy-2.4.0.jar
-jackson-annotations-2.4.4.jar
-jackson-core-2.4.4.jar
+jackson-annotations-2.5.3.jar
+jackson-core-2.5.3.jar
 jackson-core-asl-1.9.13.jar
-jackson-databind-2.4.4.jar
+jackson-databind-2.5.3.jar
 jackson-jaxrs-1.9.13.jar
 jackson-mapper-asl-1.9.13.jar
-jackson-module-scala_2.10-2.4.4.jar
+jackson-module-scala_2.10-2.5.3.jar
 jackson-xc-1.9.13.jar
 janino-2.7.8.jar
 jansi-1.4.jar

http://git-wip-us.apache.org/repos/asf/spark/blob/8fe928b4/dev/deps/spark-deps-hadoop-2.3
--
diff --git a/dev/deps/spark-deps-hadoop-2.3 b/dev/deps/spark-deps-hadoop-2.3
index 7478181..0985089 100644
--- a/dev/deps/spark-deps-hadoop-2.3
+++ b/dev/deps/spark-deps-hadoop-2.3
@@ -79,13 +79,13 @@ hadoop-yarn-server-web-proxy-2.3.0.jar
 httpclient-4.3.2.jar
 httpcore-4.3.2.jar
 ivy-2.4.0.jar
-jackson-annotations-2.4.4.jar
-jackson-core-2.4.4.jar
+jackson-annotations-2.5.3.jar
+jackson-core-2.5.3.jar
 jackson-core-asl-1.9.13.jar
-jackson-databind-2.4.4.jar
+jackson-databind-2.5.3.jar
 jackson-jaxrs-1.9.13.jar
 jackson-mapper-asl-1.9.13.jar
-jackson-module-scala_2.10-2.4.4.jar
+jackson-module-scala_2.10-2.5.3.jar
 jackson-xc-1.9.13.jar
 janino-2.7.8.jar
 jansi-1.4.jar

http://git-wip-us.apache.org/repos/asf/spark/blob/8fe928b4/dev/deps/spark-deps-hadoop-2.4
--
diff --git a/dev/deps/spark-deps-hadoop-2.4 b/dev/deps/spark-deps-hadoop-2.4
index faffb8b..50f0626 100644
--- a/dev/deps/spark-deps-hadoop-2.4
+++ b/dev/deps/spark-deps-hadoop-2.4
@@ -79,13 +79,13 @@ hadoop-yarn-server-web-proxy-2.4.0.jar
 httpclient-4.3.2.jar
 httpcore-4.3.2.jar
 ivy-2.4.0.jar
-jackson-annotations-2.4.4.jar
-jackson-core-2.4.4.jar
+jackson-annotations-2.5.3.jar
+jackson-core-2.5.3.jar
 jackson-core-asl-1.9.13.jar
-jackson-databind-2.4.4.jar
+jackson-databind-2.5.3.jar
 jackson-jaxrs-1.9.13.jar
 jackson-mapper-asl-1.9.13.jar
-jackson-module-scala_2.10-2.4.4.jar
+jackson-module-scala_2.10-2.5.3.jar
 jackson-xc-1.9.13.jar
 janino-2.7.8.jar
 jansi-1.4.jar

http://git-wip-us.apache.org/repos/asf/spark/blob/8fe928b4/dev/deps/spark-deps-hadoop-2.6
--
diff --git a/dev/deps/spark-deps-hadoop-2.6 b/dev/deps/spark-deps-hadoop-2.6
index e703c7a..2b6ca98 100644
--- a/dev/deps/spark-deps-hadoop-2.6
+++ b/dev/deps/spark-deps-hadoop-2.6
@@ -85,13 +85,13 @@ htrace-core-3.0.4.jar
 httpclient-4.3.2.jar
 httpcore-4.3.2.jar
 ivy-2.4.0.jar
-jackson-annotations-2.4.4.jar
-jackson-core-2.4.4.jar
+jackson-annotations-2.5.3.jar
+jackson-core-2.5.3.jar
 jackson-core-asl-1.9.13.jar
-jackson-databind-2.4.4.jar
+jackson-databind-2.5.3.jar
 jackson-jaxrs-1.9.13.jar
 jackson-mapper-asl-1.9.13.jar
-jackson-module-scala_2.10-2.4.4.jar
+jackson-module-scala_2.10-2.5.3.jar
 jackson-xc-1.9.13.jar
 janino-2.7.8.jar
 jansi-1.4.jar


spark git commit: removed lambda from sortByKey()

2016-01-11 Thread srowen
Repository: spark
Updated Branches:
  refs/heads/branch-1.6 43b72d83e -> d4cfd2acd


removed lambda from sortByKey()

According to the documentation the sortByKey method does not take a lambda as 
an argument, thus the example is flawed. Removed the argument completely as 
this will default to ascending sort.

Author: Udo Klein 

Closes #10640 from udoklein/patch-1.

(cherry picked from commit bd723bd53d9a28239b60939a248a4ea13340aad8)
Signed-off-by: Sean Owen 


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/d4cfd2ac
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/d4cfd2ac
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/d4cfd2ac

Branch: refs/heads/branch-1.6
Commit: d4cfd2acd62f2b0638a1248a38263c04eaf8
Parents: 43b72d8
Author: Udo Klein 
Authored: Mon Jan 11 09:30:08 2016 +
Committer: Sean Owen 
Committed: Mon Jan 11 09:30:27 2016 +

--
 examples/src/main/python/sort.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/d4cfd2ac/examples/src/main/python/sort.py
--
diff --git a/examples/src/main/python/sort.py b/examples/src/main/python/sort.py
index f6b0ecb..b6c2916 100755
--- a/examples/src/main/python/sort.py
+++ b/examples/src/main/python/sort.py
@@ -30,7 +30,7 @@ if __name__ == "__main__":
 lines = sc.textFile(sys.argv[1], 1)
 sortedCount = lines.flatMap(lambda x: x.split(' ')) \
 .map(lambda x: (int(x), 1)) \
-.sortByKey(lambda x: x)
+.sortByKey()
 # This is just a demo on how to bring all the sorted data back to a single 
node.
 # In reality, we wouldn't want to collect all the data to the driver node.
 output = sortedCount.collect()


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org



spark git commit: [SPARK-12734][BUILD] Backport Netty exclusion + Maven enforcer fixes to branch-1.5

2016-01-11 Thread joshrosen
Repository: spark
Updated Branches:
  refs/heads/branch-1.5 de7194a0d -> 665aa47f8


[SPARK-12734][BUILD] Backport Netty exclusion + Maven enforcer fixes to 
branch-1.5

This patch backports the Netty exclusion fixes from #10672 to branch-1.5.

Author: Josh Rosen 

Closes #10690 from JoshRosen/netty-exclude-15-backport.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/665aa47f
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/665aa47f
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/665aa47f

Branch: refs/heads/branch-1.5
Commit: 665aa47f840ef392ccf3dcd23e7fd93987f7769a
Parents: de7194a
Author: Josh Rosen 
Authored: Mon Jan 11 00:51:44 2016 -0800
Committer: Josh Rosen 
Committed: Mon Jan 11 00:51:44 2016 -0800

--
 dev/test-dependencies.sh | 16 
 pom.xml  | 21 -
 2 files changed, 24 insertions(+), 13 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/665aa47f/dev/test-dependencies.sh
--
diff --git a/dev/test-dependencies.sh b/dev/test-dependencies.sh
index 129741f..47ae15b 100755
--- a/dev/test-dependencies.sh
+++ b/dev/test-dependencies.sh
@@ -76,18 +76,10 @@ for HADOOP_PROFILE in "${HADOOP_PROFILES[@]}"; do
 HADOOP_MODULE_PROFILES="-Phive-thriftserver -Pyarn -Phive"
   fi
   echo "Performing Maven install for $HADOOP_PROFILE"
-  $MVN $HADOOP_MODULE_PROFILES -P$HADOOP_PROFILE jar:jar install:install -q \
--pl '!assembly' \
--pl '!examples' \
--pl '!external/flume-assembly' \
--pl '!external/kafka-assembly' \
--pl '!external/twitter' \
--pl '!external/flume' \
--pl '!external/mqtt' \
--pl '!external/mqtt-assembly' \
--pl '!external/zeromq' \
--pl '!external/kafka' \
--DskipTests
+  $MVN $HADOOP_MODULE_PROFILES -P$HADOOP_PROFILE jar:jar jar:test-jar 
install:install -q
+
+  echo "Performing Maven validate for $HADOOP_PROFILE"
+  $MVN $HADOOP_MODULE_PROFILES -P$HADOOP_PROFILE validate -q
 
   echo "Generating dependency manifest for $HADOOP_PROFILE"
   mkdir -p dev/pr-deps

http://git-wip-us.apache.org/repos/asf/spark/blob/665aa47f/pom.xml
--
diff --git a/pom.xml b/pom.xml
index c751298..ef00c70 100644
--- a/pom.xml
+++ b/pom.xml
@@ -1058,6 +1058,12 @@
 zookeeper
 ${zookeeper.version}
 ${hadoop.deps.scope}
+
+  
+org.jboss.netty
+netty
+  
+
   
   
 org.codehaus.jackson
@@ -1774,7 +1780,7 @@
 
   org.apache.maven.plugins
   maven-enforcer-plugin
-  1.4
+  1.4.1
   
 
   enforce-versions
@@ -1789,6 +1795,19 @@
   
 ${java.version}
   
+  
+
+  
+  org.jboss.netty
+
+true
+  
 
   
 


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org



spark git commit: removed lambda from sortByKey()

2016-01-11 Thread srowen
Repository: spark
Updated Branches:
  refs/heads/branch-1.5 665aa47f8 -> 0e2aa4198


removed lambda from sortByKey()

According to the documentation the sortByKey method does not take a lambda as 
an argument, thus the example is flawed. Removed the argument completely as 
this will default to ascending sort.

Author: Udo Klein 

Closes #10640 from udoklein/patch-1.

(cherry picked from commit bd723bd53d9a28239b60939a248a4ea13340aad8)
Signed-off-by: Sean Owen 


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/0e2aa419
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/0e2aa419
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/0e2aa419

Branch: refs/heads/branch-1.5
Commit: 0e2aa41988c4ae8391b48b0902badf0cda188dcc
Parents: 665aa47
Author: Udo Klein 
Authored: Mon Jan 11 09:30:08 2016 +
Committer: Sean Owen 
Committed: Mon Jan 11 09:30:40 2016 +

--
 examples/src/main/python/sort.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/0e2aa419/examples/src/main/python/sort.py
--
diff --git a/examples/src/main/python/sort.py b/examples/src/main/python/sort.py
index f6b0ecb..b6c2916 100755
--- a/examples/src/main/python/sort.py
+++ b/examples/src/main/python/sort.py
@@ -30,7 +30,7 @@ if __name__ == "__main__":
 lines = sc.textFile(sys.argv[1], 1)
 sortedCount = lines.flatMap(lambda x: x.split(' ')) \
 .map(lambda x: (int(x), 1)) \
-.sortByKey(lambda x: x)
+.sortByKey()
 # This is just a demo on how to bring all the sorted data back to a single 
node.
 # In reality, we wouldn't want to collect all the data to the driver node.
 output = sortedCount.collect()


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org



spark git commit: removed lambda from sortByKey()

2016-01-11 Thread srowen
Repository: spark
Updated Branches:
  refs/heads/master f253feff6 -> bd723bd53


removed lambda from sortByKey()

According to the documentation the sortByKey method does not take a lambda as 
an argument, thus the example is flawed. Removed the argument completely as 
this will default to ascending sort.

Author: Udo Klein 

Closes #10640 from udoklein/patch-1.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/bd723bd5
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/bd723bd5
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/bd723bd5

Branch: refs/heads/master
Commit: bd723bd53d9a28239b60939a248a4ea13340aad8
Parents: f253fef
Author: Udo Klein 
Authored: Mon Jan 11 09:30:08 2016 +
Committer: Sean Owen 
Committed: Mon Jan 11 09:30:08 2016 +

--
 examples/src/main/python/sort.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/bd723bd5/examples/src/main/python/sort.py
--
diff --git a/examples/src/main/python/sort.py b/examples/src/main/python/sort.py
index f6b0ecb..b6c2916 100755
--- a/examples/src/main/python/sort.py
+++ b/examples/src/main/python/sort.py
@@ -30,7 +30,7 @@ if __name__ == "__main__":
 lines = sc.textFile(sys.argv[1], 1)
 sortedCount = lines.flatMap(lambda x: x.split(' ')) \
 .map(lambda x: (int(x), 1)) \
-.sortByKey(lambda x: x)
+.sortByKey()
 # This is just a demo on how to bring all the sorted data back to a single 
node.
 # In reality, we wouldn't want to collect all the data to the driver node.
 output = sortedCount.collect()


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org



spark git commit: [SPARK-12742][SQL] org.apache.spark.sql.hive.LogicalPlanToSQLSuite failure due to Table already exists exception

2016-01-11 Thread lian
Repository: spark
Updated Branches:
  refs/heads/master fe9eb0b0c -> 473907adf


[SPARK-12742][SQL] org.apache.spark.sql.hive.LogicalPlanToSQLSuite failure due 
to Table already exists exception

```
[info] Exception encountered when attempting to run a suite with class name:
org.apache.spark.sql.hive.LogicalPlanToSQLSuite *** ABORTED *** (325 
milliseconds)
[info]   org.apache.spark.sql.AnalysisException: Table `t1` already exists.;
[info]   at 
org.apache.spark.sql.DataFrameWriter.saveAsTable(DataFrameWriter.scala:296)
[info]   at 
org.apache.spark.sql.DataFrameWriter.saveAsTable(DataFrameWriter.scala:285)
[info]   at 
org.apache.spark.sql.hive.LogicalPlanToSQLSuite.beforeAll(LogicalPlanToSQLSuite.scala:33)
[info]   at 
org.scalatest.BeforeAndAfterAll$class.beforeAll(BeforeAndAfterAll.scala:187)
[info]   at 
org.apache.spark.sql.hive.LogicalPlanToSQLSuite.beforeAll(LogicalPlanToSQLSuite.scala:23)
[info]   at 
org.scalatest.BeforeAndAfterAll$class.run(BeforeAndAfterAll.scala:253)
[info]   at 
org.apache.spark.sql.hive.LogicalPlanToSQLSuite.run(LogicalPlanToSQLSuite.scala:23)
[info]   at 
org.scalatest.tools.Framework.org$scalatest$tools$Framework$$runSuite(Framework.scala:462)
[info]   at 
org.scalatest.tools.Framework$ScalaTestTask.execute(Framework.scala:671)
[info]   at sbt.ForkMain$Run$2.call(ForkMain.java:296)
[info]   at sbt.ForkMain$Run$2.call(ForkMain.java:286)
[info]   at java.util.concurrent.FutureTask.run(FutureTask.java:266)
[info]   at 
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
[info]   at 
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
[info]   at java.lang.Thread.run(Thread.java:745)
```

/cc liancheng

Author: wangfei 

Closes #10682 from scwf/fix-test.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/473907ad
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/473907ad
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/473907ad

Branch: refs/heads/master
Commit: 473907adf6e37855ee31d0703b43d7170e26b4b9
Parents: fe9eb0b
Author: wangfei 
Authored: Mon Jan 11 18:18:44 2016 -0800
Committer: Cheng Lian 
Committed: Mon Jan 11 18:18:44 2016 -0800

--
 .../scala/org/apache/spark/sql/hive/LogicalPlanToSQLSuite.scala   | 3 +++
 1 file changed, 3 insertions(+)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/473907ad/sql/hive/src/test/scala/org/apache/spark/sql/hive/LogicalPlanToSQLSuite.scala
--
diff --git 
a/sql/hive/src/test/scala/org/apache/spark/sql/hive/LogicalPlanToSQLSuite.scala 
b/sql/hive/src/test/scala/org/apache/spark/sql/hive/LogicalPlanToSQLSuite.scala
index 9a8a9c5..2ee8150 100644
--- 
a/sql/hive/src/test/scala/org/apache/spark/sql/hive/LogicalPlanToSQLSuite.scala
+++ 
b/sql/hive/src/test/scala/org/apache/spark/sql/hive/LogicalPlanToSQLSuite.scala
@@ -24,6 +24,9 @@ class LogicalPlanToSQLSuite extends SQLBuilderTest with 
SQLTestUtils {
   import testImplicits._
 
   protected override def beforeAll(): Unit = {
+sql("DROP TABLE IF EXISTS t0")
+sql("DROP TABLE IF EXISTS t1")
+sql("DROP TABLE IF EXISTS t2")
 sqlContext.range(10).write.saveAsTable("t0")
 
 sqlContext


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org



spark git commit: [SPARK-11823] Ignores HiveThriftBinaryServerSuite's test jdbc cancel

2016-01-11 Thread joshrosen
Repository: spark
Updated Branches:
  refs/heads/branch-1.6 dd2cf64f3 -> a6c9c68d8


[SPARK-11823] Ignores HiveThriftBinaryServerSuite's test jdbc cancel

https://issues.apache.org/jira/browse/SPARK-11823

This test often hangs and times out, leaving hanging processes. Let's ignore it 
for now and improve the test.

Author: Yin Huai 

Closes #10715 from yhuai/SPARK-11823-ignore.

(cherry picked from commit aaa2c3b628319178ca1f3f68966ff253c2de49cb)
Signed-off-by: Josh Rosen 


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/a6c9c68d
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/a6c9c68d
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/a6c9c68d

Branch: refs/heads/branch-1.6
Commit: a6c9c68d8855e3a8bfc92f26b3877b92367087a4
Parents: dd2cf64
Author: Yin Huai 
Authored: Mon Jan 11 19:59:15 2016 -0800
Committer: Josh Rosen 
Committed: Mon Jan 11 19:59:37 2016 -0800

--
 .../spark/sql/hive/thriftserver/HiveThriftServer2Suites.scala| 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/a6c9c68d/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/HiveThriftServer2Suites.scala
--
diff --git 
a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/HiveThriftServer2Suites.scala
 
b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/HiveThriftServer2Suites.scala
index ebb2575..1bd6ec9 100644
--- 
a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/HiveThriftServer2Suites.scala
+++ 
b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/HiveThriftServer2Suites.scala
@@ -347,7 +347,9 @@ class HiveThriftBinaryServerSuite extends 
HiveThriftJdbcTest {
 )
   }
 
-  test("test jdbc cancel") {
+  // This test often hangs and then times out, leaving the hanging processes.
+  // Let's ignore it and improve the test.
+  ignore("test jdbc cancel") {
 withJdbcStatement { statement =>
   val queries = Seq(
 "DROP TABLE IF EXISTS test_map",


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org



spark git commit: [SPARK-12498][SQL][MINOR] BooleanSimplication simplification

2016-01-11 Thread rxin
Repository: spark
Updated Branches:
  refs/heads/master 473907adf -> 36d493509


[SPARK-12498][SQL][MINOR] BooleanSimplication simplification

Scala syntax allows binary case classes to be used as infix operator in pattern 
matching. This PR makes use of this syntax sugar to make 
`BooleanSimplification` more readable.

Author: Cheng Lian 

Closes #10445 from liancheng/boolean-simplification-simplification.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/36d49350
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/36d49350
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/36d49350

Branch: refs/heads/master
Commit: 36d493509d32d14b54af62f5f65e8fa750e7413d
Parents: 473907a
Author: Cheng Lian 
Authored: Mon Jan 11 18:42:26 2016 -0800
Committer: Reynold Xin 
Committed: Mon Jan 11 18:42:26 2016 -0800

--
 .../sql/catalyst/expressions/literals.scala |   4 +
 .../sql/catalyst/optimizer/Optimizer.scala  | 190 +--
 2 files changed, 92 insertions(+), 102 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/36d49350/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/literals.scala
--
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/literals.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/literals.scala
index 17351ef..e0b0203 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/literals.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/literals.scala
@@ -28,6 +28,10 @@ import org.apache.spark.sql.types._
 import org.apache.spark.unsafe.types._
 
 object Literal {
+  val TrueLiteral: Literal = Literal(true, BooleanType)
+
+  val FalseLiteral: Literal = Literal(false, BooleanType)
+
   def apply(v: Any): Literal = v match {
 case i: Int => Literal(i, IntegerType)
 case l: Long => Literal(l, LongType)

http://git-wip-us.apache.org/repos/asf/spark/blob/36d49350/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
--
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
index f8121a7..b70bc18 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
@@ -21,6 +21,7 @@ import scala.collection.immutable.HashSet
 
 import org.apache.spark.sql.catalyst.analysis.{CleanupAliases, 
EliminateSubQueries}
 import org.apache.spark.sql.catalyst.expressions._
+import org.apache.spark.sql.catalyst.expressions.Literal.{FalseLiteral, 
TrueLiteral}
 import org.apache.spark.sql.catalyst.expressions.aggregate._
 import org.apache.spark.sql.catalyst.planning.ExtractFiltersAndInnerJoins
 import org.apache.spark.sql.catalyst.plans.{FullOuter, Inner, LeftOuter, 
LeftSemi, RightOuter}
@@ -519,112 +520,97 @@ object OptimizeIn extends Rule[LogicalPlan] {
 object BooleanSimplification extends Rule[LogicalPlan] with PredicateHelper {
   def apply(plan: LogicalPlan): LogicalPlan = plan transform {
 case q: LogicalPlan => q transformExpressionsUp {
-  case and @ And(left, right) => (left, right) match {
-// true && r  =>  r
-case (Literal(true, BooleanType), r) => r
-// l && true  =>  l
-case (l, Literal(true, BooleanType)) => l
-// false && r  =>  false
-case (Literal(false, BooleanType), _) => Literal(false)
-// l && false  =>  false
-case (_, Literal(false, BooleanType)) => Literal(false)
-// a && a  =>  a
-case (l, r) if l fastEquals r => l
-// a && (not(a) || b) => a && b
-case (l, Or(l1, r)) if (Not(l) == l1) => And(l, r)
-case (l, Or(r, l1)) if (Not(l) == l1) => And(l, r)
-case (Or(l, l1), r) if (l1 == Not(r)) => And(l, r)
-case (Or(l1, l), r) if (l1 == Not(r)) => And(l, r)
-// (a || b) && (a || c)  =>  a || (b && c)
-case _ =>
-  // 1. Split left and right to get the disjunctive predicates,
-  //   i.e. lhs = (a, b), rhs = (a, c)
-  // 2. Find the common predict between lhsSet and rhsSet, i.e. common 
= (a)
-  // 3. Remove common predict from lhsSet and rhsSet, i.e. ldiff = 
(b), rdiff = (c)
-  // 4. Apply the formula, get the optimized predicate: common || 
(ldiff && rdiff)
-  val lhs = splitDisjunctivePredicates(left)
-  val rhs = 

spark git commit: [SPARK-12692][BUILD][STREAMING] Scala style: Fix the style violation (Space before ", " or ":")

2016-01-11 Thread rxin
Repository: spark
Updated Branches:
  refs/heads/master aaa2c3b62 -> 39ae04e6b


[SPARK-12692][BUILD][STREAMING] Scala style: Fix the style violation (Space 
before "," or ":")

Fix the style violation (space before , and :).
This PR is a followup for #10643.

Author: Kousuke Saruta 

Closes #10685 from sarutak/SPARK-12692-followup-streaming.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/39ae04e6
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/39ae04e6
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/39ae04e6

Branch: refs/heads/master
Commit: 39ae04e6b714e085a1341aa84d8fc5fc827d5f35
Parents: aaa2c3b
Author: Kousuke Saruta 
Authored: Mon Jan 11 21:06:22 2016 -0800
Committer: Reynold Xin 
Committed: Mon Jan 11 21:06:22 2016 -0800

--
 .../clickstream/PageViewGenerator.scala | 14 
 .../spark/streaming/flume/sink/Logging.scala|  8 ++---
 .../streaming/flume/FlumeInputDStream.scala | 18 +-
 .../kafka/DirectKafkaInputDStream.scala |  4 +--
 .../streaming/kafka/KafkaInputDStream.scala |  4 +--
 .../kafka/ReliableKafkaStreamSuite.scala|  2 +-
 .../spark/streaming/mqtt/MQTTInputDStream.scala |  4 +--
 .../streaming/twitter/TwitterInputDStream.scala |  4 +--
 project/MimaExcludes.scala  | 12 +++
 .../org/apache/spark/streaming/Checkpoint.scala | 12 +++
 .../spark/streaming/StreamingContext.scala  | 36 ++--
 .../streaming/api/java/JavaDStreamLike.scala|  2 +-
 .../dstream/ConstantInputDStream.scala  |  4 +--
 .../dstream/DStreamCheckpointData.scala |  2 +-
 .../streaming/dstream/FileInputDStream.scala| 18 +-
 .../spark/streaming/dstream/InputDStream.scala  |  6 ++--
 .../dstream/PluggableInputDStream.scala |  4 +--
 .../streaming/dstream/RawInputDStream.scala |  4 +--
 .../dstream/ReceiverInputDStream.scala  |  6 ++--
 .../streaming/dstream/SocketInputDStream.scala  |  4 +--
 .../spark/streaming/dstream/StateDStream.scala  |  6 ++--
 .../spark/streaming/receiver/Receiver.scala |  8 ++---
 .../spark/streaming/BasicOperationsSuite.scala  |  2 +-
 .../spark/streaming/CheckpointSuite.scala   |  2 +-
 .../spark/streaming/MasterFailureTest.scala |  4 +--
 .../apache/spark/streaming/StateMapSuite.scala  |  2 +-
 .../spark/streaming/StreamingContextSuite.scala |  2 +-
 .../apache/spark/streaming/TestSuiteBase.scala  |  4 +--
 .../scheduler/ReceiverTrackerSuite.scala|  4 +--
 .../streaming/util/WriteAheadLogSuite.scala |  2 +-
 30 files changed, 108 insertions(+), 96 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/39ae04e6/examples/src/main/scala/org/apache/spark/examples/streaming/clickstream/PageViewGenerator.scala
--
diff --git 
a/examples/src/main/scala/org/apache/spark/examples/streaming/clickstream/PageViewGenerator.scala
 
b/examples/src/main/scala/org/apache/spark/examples/streaming/clickstream/PageViewGenerator.scala
index ce1a620..50216b9 100644
--- 
a/examples/src/main/scala/org/apache/spark/examples/streaming/clickstream/PageViewGenerator.scala
+++ 
b/examples/src/main/scala/org/apache/spark/examples/streaming/clickstream/PageViewGenerator.scala
@@ -23,15 +23,15 @@ import java.net.ServerSocket
 import java.util.Random
 
 /** Represents a page view on a website with associated dimension data. */
-class PageView(val url : String, val status : Int, val zipCode : Int, val 
userID : Int)
+class PageView(val url: String, val status: Int, val zipCode: Int, val userID: 
Int)
 extends Serializable {
-  override def toString() : String = {
+  override def toString(): String = {
 "%s\t%s\t%s\t%s\n".format(url, status, zipCode, userID)
   }
 }
 
 object PageView extends Serializable {
-  def fromString(in : String) : PageView = {
+  def fromString(in: String): PageView = {
 val parts = in.split("\t")
 new PageView(parts(0), parts(1).toInt, parts(2).toInt, parts(3).toInt)
   }
@@ -58,9 +58,9 @@ object PageViewGenerator {
404 -> .05)
   val userZipCode = Map(94709 -> .5,
 94117 -> .5)
-  val userID = Map((1 to 100).map(_ -> .01) : _*)
+  val userID = Map((1 to 100).map(_ -> .01): _*)
 
-  def pickFromDistribution[T](inputMap : Map[T, Double]) : T = {
+  def pickFromDistribution[T](inputMap: Map[T, Double]): T = {
 val rand = new Random().nextDouble()
 var total = 0.0
 for ((item, prob) <- inputMap) {
@@ -72,7 +72,7 @@ object PageViewGenerator {
 inputMap.take(1).head._1 // Shouldn't get here if probabilities add up to 
1.0
   }
 
-  def getNextClickEvent() : String = {
+  def 

spark git commit: [SPARK-12692][BUILD][YARN] Scala style: Fix the style violation (Space before ", " or ":")

2016-01-11 Thread rxin
Repository: spark
Updated Branches:
  refs/heads/master 39ae04e6b -> 112abf910


[SPARK-12692][BUILD][YARN] Scala style: Fix the style violation (Space before 
"," or ":")

Fix the style violation (space before , and :).
This PR is a followup for #10643.

Author: Kousuke Saruta 

Closes #10686 from sarutak/SPARK-12692-followup-yarn.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/112abf91
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/112abf91
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/112abf91

Branch: refs/heads/master
Commit: 112abf9100f05be436e449817468c50174712c78
Parents: 39ae04e
Author: Kousuke Saruta 
Authored: Mon Jan 11 21:37:54 2016 -0800
Committer: Reynold Xin 
Committed: Mon Jan 11 21:37:54 2016 -0800

--
 .../scala/org/apache/spark/deploy/yarn/YarnSparkHadoopUtil.scala   | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/112abf91/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnSparkHadoopUtil.scala
--
diff --git 
a/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnSparkHadoopUtil.scala 
b/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnSparkHadoopUtil.scala
index e286aed..272f129 100644
--- a/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnSparkHadoopUtil.scala
+++ b/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnSparkHadoopUtil.scala
@@ -357,7 +357,7 @@ object YarnSparkHadoopUtil {
*
* @return The correct OOM Error handler JVM option, platform dependent.
*/
-  def getOutOfMemoryErrorArgument : String = {
+  def getOutOfMemoryErrorArgument: String = {
 if (Utils.isWindows) {
   escapeForShell("-XX:OnOutOfMemoryError=taskkill /F /PID p")
 } else {


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org



spark git commit: [SPARK-12734][HOTFIX] Build changes must trigger all tests; clean after install in dep tests

2016-01-11 Thread joshrosen
Repository: spark
Updated Branches:
  refs/heads/branch-1.6 ce906b33d -> 3b32aa9e2


[SPARK-12734][HOTFIX] Build changes must trigger all tests; clean after install 
in dep tests

This patch fixes a build/test issue caused by the combination of #10672 and a 
latent issue in the original `dev/test-dependencies` script.

First, changes which _only_ touched build files were not triggering full 
Jenkins runs, making it possible for a build change to be merged even though it 
could cause failures in other tests. The `root` build module now depends on 
`build`, so all tests will now be run whenever a build-related file is changed.

I also added a `clean` step to the Maven install step in 
`dev/test-dependencies` in order to address an issue where the dummy JARs stuck 
around and caused "multiple assembly JARs found" errors in tests.

/cc zsxwing

Author: Josh Rosen 

Closes #10704 from JoshRosen/fix-build-test-problems.

(cherry picked from commit a44991453a43615028083ba9546f5cd93112f6bd)
Signed-off-by: Josh Rosen 


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/3b32aa9e
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/3b32aa9e
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/3b32aa9e

Branch: refs/heads/branch-1.6
Commit: 3b32aa9e29506606d4ca2407aa65a1aab8794805
Parents: ce906b3
Author: Josh Rosen 
Authored: Mon Jan 11 12:56:43 2016 -0800
Committer: Josh Rosen 
Committed: Mon Jan 11 12:58:07 2016 -0800

--
 dev/sparktestsupport/modules.py | 2 +-
 dev/test-dependencies.sh| 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/3b32aa9e/dev/sparktestsupport/modules.py
--
diff --git a/dev/sparktestsupport/modules.py b/dev/sparktestsupport/modules.py
index 47cd600..21667d5 100644
--- a/dev/sparktestsupport/modules.py
+++ b/dev/sparktestsupport/modules.py
@@ -435,7 +435,7 @@ yarn = Module(
 # No other modules should directly depend on this module.
 root = Module(
 name="root",
-dependencies=[],
+dependencies=[build],  # Changes to build should trigger all tests.
 source_file_regexes=[],
 # In order to run all of the tests, enable every test profile:
 build_profile_flags=list(set(

http://git-wip-us.apache.org/repos/asf/spark/blob/3b32aa9e/dev/test-dependencies.sh
--
diff --git a/dev/test-dependencies.sh b/dev/test-dependencies.sh
index 47ae15b..efb49f7 100755
--- a/dev/test-dependencies.sh
+++ b/dev/test-dependencies.sh
@@ -76,7 +76,7 @@ for HADOOP_PROFILE in "${HADOOP_PROFILES[@]}"; do
 HADOOP_MODULE_PROFILES="-Phive-thriftserver -Pyarn -Phive"
   fi
   echo "Performing Maven install for $HADOOP_PROFILE"
-  $MVN $HADOOP_MODULE_PROFILES -P$HADOOP_PROFILE jar:jar jar:test-jar 
install:install -q
+  $MVN $HADOOP_MODULE_PROFILES -P$HADOOP_PROFILE jar:jar jar:test-jar 
install:install clean -q
 
   echo "Performing Maven validate for $HADOOP_PROFILE"
   $MVN $HADOOP_MODULE_PROFILES -P$HADOOP_PROFILE validate -q


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org



spark git commit: [SPARK-12734][HOTFIX] Build changes must trigger all tests; clean after install in dep tests

2016-01-11 Thread joshrosen
Repository: spark
Updated Branches:
  refs/heads/branch-1.5 0e2aa4198 -> 6cc8f407d


[SPARK-12734][HOTFIX] Build changes must trigger all tests; clean after install 
in dep tests

This patch fixes a build/test issue caused by the combination of #10672 and a 
latent issue in the original `dev/test-dependencies` script.

First, changes which _only_ touched build files were not triggering full 
Jenkins runs, making it possible for a build change to be merged even though it 
could cause failures in other tests. The `root` build module now depends on 
`build`, so all tests will now be run whenever a build-related file is changed.

I also added a `clean` step to the Maven install step in 
`dev/test-dependencies` in order to address an issue where the dummy JARs stuck 
around and caused "multiple assembly JARs found" errors in tests.

/cc zsxwing

Author: Josh Rosen 

Closes #10704 from JoshRosen/fix-build-test-problems.

(cherry picked from commit a44991453a43615028083ba9546f5cd93112f6bd)
Signed-off-by: Josh Rosen 


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/6cc8f407
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/6cc8f407
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/6cc8f407

Branch: refs/heads/branch-1.5
Commit: 6cc8f407d549ec74dc336d043a03149649553b3f
Parents: 0e2aa41
Author: Josh Rosen 
Authored: Mon Jan 11 12:56:43 2016 -0800
Committer: Josh Rosen 
Committed: Mon Jan 11 12:59:15 2016 -0800

--
 dev/sparktestsupport/modules.py | 2 +-
 dev/test-dependencies.sh| 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/6cc8f407/dev/sparktestsupport/modules.py
--
diff --git a/dev/sparktestsupport/modules.py b/dev/sparktestsupport/modules.py
index a412922..e5f15b5 100644
--- a/dev/sparktestsupport/modules.py
+++ b/dev/sparktestsupport/modules.py
@@ -413,7 +413,7 @@ ec2 = Module(
 # No other modules should directly depend on this module.
 root = Module(
 name="root",
-dependencies=[],
+dependencies=[build],  # Changes to build should trigger all tests.
 source_file_regexes=[],
 # In order to run all of the tests, enable every test profile:
 build_profile_flags=list(set(

http://git-wip-us.apache.org/repos/asf/spark/blob/6cc8f407/dev/test-dependencies.sh
--
diff --git a/dev/test-dependencies.sh b/dev/test-dependencies.sh
index 47ae15b..efb49f7 100755
--- a/dev/test-dependencies.sh
+++ b/dev/test-dependencies.sh
@@ -76,7 +76,7 @@ for HADOOP_PROFILE in "${HADOOP_PROFILES[@]}"; do
 HADOOP_MODULE_PROFILES="-Phive-thriftserver -Pyarn -Phive"
   fi
   echo "Performing Maven install for $HADOOP_PROFILE"
-  $MVN $HADOOP_MODULE_PROFILES -P$HADOOP_PROFILE jar:jar jar:test-jar 
install:install -q
+  $MVN $HADOOP_MODULE_PROFILES -P$HADOOP_PROFILE jar:jar jar:test-jar 
install:install clean -q
 
   echo "Performing Maven validate for $HADOOP_PROFILE"
   $MVN $HADOOP_MODULE_PROFILES -P$HADOOP_PROFILE validate -q


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org



spark git commit: [SPARK-12734][HOTFIX] Build changes must trigger all tests; clean after install in dep tests

2016-01-11 Thread joshrosen
Repository: spark
Updated Branches:
  refs/heads/master b313badaa -> a44991453


[SPARK-12734][HOTFIX] Build changes must trigger all tests; clean after install 
in dep tests

This patch fixes a build/test issue caused by the combination of #10672 and a 
latent issue in the original `dev/test-dependencies` script.

First, changes which _only_ touched build files were not triggering full 
Jenkins runs, making it possible for a build change to be merged even though it 
could cause failures in other tests. The `root` build module now depends on 
`build`, so all tests will now be run whenever a build-related file is changed.

I also added a `clean` step to the Maven install step in 
`dev/test-dependencies` in order to address an issue where the dummy JARs stuck 
around and caused "multiple assembly JARs found" errors in tests.

/cc zsxwing

Author: Josh Rosen 

Closes #10704 from JoshRosen/fix-build-test-problems.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/a4499145
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/a4499145
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/a4499145

Branch: refs/heads/master
Commit: a44991453a43615028083ba9546f5cd93112f6bd
Parents: b313bad
Author: Josh Rosen 
Authored: Mon Jan 11 12:56:43 2016 -0800
Committer: Josh Rosen 
Committed: Mon Jan 11 12:56:43 2016 -0800

--
 dev/sparktestsupport/modules.py | 2 +-
 dev/test-dependencies.sh| 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/a4499145/dev/sparktestsupport/modules.py
--
diff --git a/dev/sparktestsupport/modules.py b/dev/sparktestsupport/modules.py
index 1fc6596..93a8c15 100644
--- a/dev/sparktestsupport/modules.py
+++ b/dev/sparktestsupport/modules.py
@@ -426,7 +426,7 @@ yarn = Module(
 # No other modules should directly depend on this module.
 root = Module(
 name="root",
-dependencies=[],
+dependencies=[build],  # Changes to build should trigger all tests.
 source_file_regexes=[],
 # In order to run all of the tests, enable every test profile:
 build_profile_flags=list(set(

http://git-wip-us.apache.org/repos/asf/spark/blob/a4499145/dev/test-dependencies.sh
--
diff --git a/dev/test-dependencies.sh b/dev/test-dependencies.sh
index def87aa..3cb5d2b 100755
--- a/dev/test-dependencies.sh
+++ b/dev/test-dependencies.sh
@@ -70,7 +70,7 @@ $MVN -q versions:set -DnewVersion=$TEMP_VERSION 
-DgenerateBackupPoms=false > /de
 # Generate manifests for each Hadoop profile:
 for HADOOP_PROFILE in "${HADOOP_PROFILES[@]}"; do
   echo "Performing Maven install for $HADOOP_PROFILE"
-  $MVN $HADOOP2_MODULE_PROFILES -P$HADOOP_PROFILE jar:jar jar:test-jar 
install:install -q
+  $MVN $HADOOP2_MODULE_PROFILES -P$HADOOP_PROFILE jar:jar jar:test-jar 
install:install clean -q
 
   echo "Performing Maven validate for $HADOOP_PROFILE"
   $MVN $HADOOP2_MODULE_PROFILES -P$HADOOP_PROFILE validate -q


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org



spark git commit: [SPARK-12744][SQL] Change parsing JSON integers to timestamps to treat integers as number of seconds

2016-01-11 Thread yhuai
Repository: spark
Updated Branches:
  refs/heads/master 8fe928b4f -> 9559ac5f7


[SPARK-12744][SQL] Change parsing JSON integers to timestamps to treat integers 
as number of seconds

JIRA: https://issues.apache.org/jira/browse/SPARK-12744

This PR makes parsing JSON integers to timestamps consistent with casting 
behavior.

Author: Anatoliy Plastinin 

Closes #10687 from antlypls/fix-json-timestamp-parsing.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/9559ac5f
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/9559ac5f
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/9559ac5f

Branch: refs/heads/master
Commit: 9559ac5f74434cf4bf611bdcde9a216d39799826
Parents: 8fe928b
Author: Anatoliy Plastinin 
Authored: Mon Jan 11 10:28:57 2016 -0800
Committer: Yin Huai 
Committed: Mon Jan 11 10:28:57 2016 -0800

--
 .../execution/datasources/json/JacksonParser.scala |  2 +-
 .../sql/execution/datasources/json/JsonSuite.scala | 17 +++--
 .../execution/datasources/json/TestJsonData.scala  |  4 
 3 files changed, 20 insertions(+), 3 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/9559ac5f/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/json/JacksonParser.scala
--
diff --git 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/json/JacksonParser.scala
 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/json/JacksonParser.scala
index 2e3fe3d..b2f5c1e 100644
--- 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/json/JacksonParser.scala
+++ 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/json/JacksonParser.scala
@@ -90,7 +90,7 @@ object JacksonParser {
 DateTimeUtils.stringToTime(parser.getText).getTime * 1000L
 
   case (VALUE_NUMBER_INT, TimestampType) =>
-parser.getLongValue * 1000L
+parser.getLongValue * 100L
 
   case (_, StringType) =>
 val writer = new ByteArrayOutputStream()

http://git-wip-us.apache.org/repos/asf/spark/blob/9559ac5f/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala
--
diff --git 
a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala
 
b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala
index b3b6b7d..4ab1480 100644
--- 
a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala
+++ 
b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala
@@ -83,9 +83,9 @@ class JsonSuite extends QueryTest with SharedSQLContext with 
TestJsonData {
 val doubleNumber: Double = 1.7976931348623157E308d
 checkTypePromotion(doubleNumber.toDouble, enforceCorrectType(doubleNumber, 
DoubleType))
 
-checkTypePromotion(DateTimeUtils.fromJavaTimestamp(new 
Timestamp(intNumber)),
+checkTypePromotion(DateTimeUtils.fromJavaTimestamp(new Timestamp(intNumber 
* 1000L)),
 enforceCorrectType(intNumber, TimestampType))
-checkTypePromotion(DateTimeUtils.fromJavaTimestamp(new 
Timestamp(intNumber.toLong)),
+checkTypePromotion(DateTimeUtils.fromJavaTimestamp(new 
Timestamp(intNumber.toLong * 1000L)),
 enforceCorrectType(intNumber.toLong, TimestampType))
 val strTime = "2014-09-30 12:34:56"
 
checkTypePromotion(DateTimeUtils.fromJavaTimestamp(Timestamp.valueOf(strTime)),
@@ -1465,4 +1465,17 @@ class JsonSuite extends QueryTest with SharedSQLContext 
with TestJsonData {
 }
   }
 
+  test("Casting long as timestamp") {
+withTempTable("jsonTable") {
+  val schema = (new StructType).add("ts", TimestampType)
+  val jsonDF = sqlContext.read.schema(schema).json(timestampAsLong)
+
+  jsonDF.registerTempTable("jsonTable")
+
+  checkAnswer(
+sql("select ts from jsonTable"),
+Row(java.sql.Timestamp.valueOf("2016-01-02 03:04:05"))
+  )
+}
+  }
 }

http://git-wip-us.apache.org/repos/asf/spark/blob/9559ac5f/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/TestJsonData.scala
--
diff --git 
a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/TestJsonData.scala
 
b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/TestJsonData.scala
index cb61f7e..a083605 100644
--- 
a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/TestJsonData.scala
+++ 

spark git commit: [STREAMING][MINOR] Typo fixes

2016-01-11 Thread zsxwing
Repository: spark
Updated Branches:
  refs/heads/branch-1.6 d4cfd2acd -> ce906b33d


[STREAMING][MINOR] Typo fixes

Author: Jacek Laskowski 

Closes #10698 from jaceklaskowski/streaming-kafka-typo-fixes.

(cherry picked from commit b313badaa049f847f33663c61cd70ee2f2cbebac)
Signed-off-by: Shixiong Zhu 


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/ce906b33
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/ce906b33
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/ce906b33

Branch: refs/heads/branch-1.6
Commit: ce906b33de64f55653b52376316aa2625fd86b47
Parents: d4cfd2a
Author: Jacek Laskowski 
Authored: Mon Jan 11 11:29:15 2016 -0800
Committer: Shixiong Zhu 
Committed: Mon Jan 11 11:29:23 2016 -0800

--
 .../main/scala/org/apache/spark/streaming/kafka/KafkaCluster.scala | 2 +-
 .../src/main/scala/org/apache/spark/streaming/kafka/KafkaRDD.scala | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/ce906b33/external/kafka/src/main/scala/org/apache/spark/streaming/kafka/KafkaCluster.scala
--
diff --git 
a/external/kafka/src/main/scala/org/apache/spark/streaming/kafka/KafkaCluster.scala
 
b/external/kafka/src/main/scala/org/apache/spark/streaming/kafka/KafkaCluster.scala
index 8465432..e3a2e57 100644
--- 
a/external/kafka/src/main/scala/org/apache/spark/streaming/kafka/KafkaCluster.scala
+++ 
b/external/kafka/src/main/scala/org/apache/spark/streaming/kafka/KafkaCluster.scala
@@ -382,7 +382,7 @@ object KafkaCluster {
 val seedBrokers: Array[(String, Int)] = brokers.split(",").map { hp =>
   val hpa = hp.split(":")
   if (hpa.size == 1) {
-throw new SparkException(s"Broker not the in correct format of 
: [$brokers]")
+throw new SparkException(s"Broker not in the correct format of 
: [$brokers]")
   }
   (hpa(0), hpa(1).toInt)
 }

http://git-wip-us.apache.org/repos/asf/spark/blob/ce906b33/external/kafka/src/main/scala/org/apache/spark/streaming/kafka/KafkaRDD.scala
--
diff --git 
a/external/kafka/src/main/scala/org/apache/spark/streaming/kafka/KafkaRDD.scala 
b/external/kafka/src/main/scala/org/apache/spark/streaming/kafka/KafkaRDD.scala
index ea5f842..4dbaf4f 100644
--- 
a/external/kafka/src/main/scala/org/apache/spark/streaming/kafka/KafkaRDD.scala
+++ 
b/external/kafka/src/main/scala/org/apache/spark/streaming/kafka/KafkaRDD.scala
@@ -156,7 +156,7 @@ class KafkaRDD[
 var requestOffset = part.fromOffset
 var iter: Iterator[MessageAndOffset] = null
 
-// The idea is to use the provided preferred host, except on task retry 
atttempts,
+// The idea is to use the provided preferred host, except on task retry 
attempts,
 // to minimize number of kafka metadata requests
 private def connectLeader: SimpleConsumer = {
   if (context.attemptNumber > 0) {


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org



spark git commit: [STREAMING][MINOR] Typo fixes

2016-01-11 Thread zsxwing
Repository: spark
Updated Branches:
  refs/heads/master 9559ac5f7 -> b313badaa


[STREAMING][MINOR] Typo fixes

Author: Jacek Laskowski 

Closes #10698 from jaceklaskowski/streaming-kafka-typo-fixes.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/b313bada
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/b313bada
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/b313bada

Branch: refs/heads/master
Commit: b313badaa049f847f33663c61cd70ee2f2cbebac
Parents: 9559ac5
Author: Jacek Laskowski 
Authored: Mon Jan 11 11:29:15 2016 -0800
Committer: Shixiong Zhu 
Committed: Mon Jan 11 11:29:15 2016 -0800

--
 .../main/scala/org/apache/spark/streaming/kafka/KafkaCluster.scala | 2 +-
 .../src/main/scala/org/apache/spark/streaming/kafka/KafkaRDD.scala | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/b313bada/external/kafka/src/main/scala/org/apache/spark/streaming/kafka/KafkaCluster.scala
--
diff --git 
a/external/kafka/src/main/scala/org/apache/spark/streaming/kafka/KafkaCluster.scala
 
b/external/kafka/src/main/scala/org/apache/spark/streaming/kafka/KafkaCluster.scala
index c4e18d9..d7885d7 100644
--- 
a/external/kafka/src/main/scala/org/apache/spark/streaming/kafka/KafkaCluster.scala
+++ 
b/external/kafka/src/main/scala/org/apache/spark/streaming/kafka/KafkaCluster.scala
@@ -385,7 +385,7 @@ object KafkaCluster {
 val seedBrokers: Array[(String, Int)] = brokers.split(",").map { hp =>
   val hpa = hp.split(":")
   if (hpa.size == 1) {
-throw new SparkException(s"Broker not the in correct format of 
: [$brokers]")
+throw new SparkException(s"Broker not in the correct format of 
: [$brokers]")
   }
   (hpa(0), hpa(1).toInt)
 }

http://git-wip-us.apache.org/repos/asf/spark/blob/b313bada/external/kafka/src/main/scala/org/apache/spark/streaming/kafka/KafkaRDD.scala
--
diff --git 
a/external/kafka/src/main/scala/org/apache/spark/streaming/kafka/KafkaRDD.scala 
b/external/kafka/src/main/scala/org/apache/spark/streaming/kafka/KafkaRDD.scala
index 603be22..4eb1556 100644
--- 
a/external/kafka/src/main/scala/org/apache/spark/streaming/kafka/KafkaRDD.scala
+++ 
b/external/kafka/src/main/scala/org/apache/spark/streaming/kafka/KafkaRDD.scala
@@ -156,7 +156,7 @@ class KafkaRDD[
 var requestOffset = part.fromOffset
 var iter: Iterator[MessageAndOffset] = null
 
-// The idea is to use the provided preferred host, except on task retry 
atttempts,
+// The idea is to use the provided preferred host, except on task retry 
attempts,
 // to minimize number of kafka metadata requests
 private def connectLeader: SimpleConsumer = {
   if (context.attemptNumber > 0) {


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org