git commit: [SPARK-2495][MLLIB] make KMeans constructor public
Repository: spark Updated Branches: refs/heads/branch-1.1 b82da3d69 - 69a17f119 [SPARK-2495][MLLIB] make KMeans constructor public to re-construct k-means models freeman-lab Author: Xiangrui Meng m...@databricks.com Closes #2112 from mengxr/public-constructors and squashes the following commits: 18d53a9 [Xiangrui Meng] make KMeans constructor public (cherry picked from commit 220f413686ae922bd11776576bf37610cce92c23) Signed-off-by: Xiangrui Meng m...@databricks.com Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/69a17f11 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/69a17f11 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/69a17f11 Branch: refs/heads/branch-1.1 Commit: 69a17f119758e786ef080cfbf52d484334c8d9d9 Parents: b82da3d Author: Xiangrui Meng m...@databricks.com Authored: Mon Aug 25 12:30:02 2014 -0700 Committer: Xiangrui Meng m...@databricks.com Committed: Mon Aug 25 12:30:15 2014 -0700 -- .../main/scala/org/apache/spark/mllib/clustering/KMeansModel.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/69a17f11/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeansModel.scala -- diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeansModel.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeansModel.scala index 5823cb6..12a3d91 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeansModel.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeansModel.scala @@ -25,7 +25,7 @@ import org.apache.spark.mllib.linalg.Vector /** * A clustering model for K-means. Each point belongs to the cluster with the closest center. */ -class KMeansModel private[mllib] (val clusterCenters: Array[Vector]) extends Serializable { +class KMeansModel (val clusterCenters: Array[Vector]) extends Serializable { /** Total number of clusters. */ def k: Int = clusterCenters.length - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
git commit: [SPARK-2495][MLLIB] make KMeans constructor public
Repository: spark Updated Branches: refs/heads/master fb0db7724 - 220f41368 [SPARK-2495][MLLIB] make KMeans constructor public to re-construct k-means models freeman-lab Author: Xiangrui Meng m...@databricks.com Closes #2112 from mengxr/public-constructors and squashes the following commits: 18d53a9 [Xiangrui Meng] make KMeans constructor public Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/220f4136 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/220f4136 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/220f4136 Branch: refs/heads/master Commit: 220f413686ae922bd11776576bf37610cce92c23 Parents: fb0db77 Author: Xiangrui Meng m...@databricks.com Authored: Mon Aug 25 12:30:02 2014 -0700 Committer: Xiangrui Meng m...@databricks.com Committed: Mon Aug 25 12:30:02 2014 -0700 -- .../main/scala/org/apache/spark/mllib/clustering/KMeansModel.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/220f4136/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeansModel.scala -- diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeansModel.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeansModel.scala index 5823cb6..12a3d91 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeansModel.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeansModel.scala @@ -25,7 +25,7 @@ import org.apache.spark.mllib.linalg.Vector /** * A clustering model for K-means. Each point belongs to the cluster with the closest center. */ -class KMeansModel private[mllib] (val clusterCenters: Array[Vector]) extends Serializable { +class KMeansModel (val clusterCenters: Array[Vector]) extends Serializable { /** Total number of clusters. */ def k: Int = clusterCenters.length - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
git commit: SPARK-2798 [BUILD] Correct several small errors in Flume module pom.xml files
Repository: spark Updated Branches: refs/heads/master 220f41368 - cd30db566 SPARK-2798 [BUILD] Correct several small errors in Flume module pom.xml files (EDIT) Since the scalatest issue was since resolved, this is now about a few small problems in the Flume Sink `pom.xml` - `scalatest` is not declared as a test-scope dependency - Its Avro version doesn't match the rest of the build - Its Flume version is not synced with the other Flume module - The other Flume module declares its dependency on Flume Sink slightly incorrectly, hard-coding the Scala 2.10 version - It depends on Scala Lang directly, which it shouldn't Author: Sean Owen so...@cloudera.com Closes #1726 from srowen/SPARK-2798 and squashes the following commits: a46e2c6 [Sean Owen] scalatest to test scope, harmonize Avro and Flume versions, remove direct Scala dependency, fix '2.10' in Flume dependency Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/cd30db56 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/cd30db56 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/cd30db56 Branch: refs/heads/master Commit: cd30db566a327ddf63cd242c758e46ce2d9479df Parents: 220f413 Author: Sean Owen so...@cloudera.com Authored: Mon Aug 25 13:29:07 2014 -0700 Committer: Tathagata Das tathagata.das1...@gmail.com Committed: Mon Aug 25 13:29:07 2014 -0700 -- external/flume-sink/pom.xml | 15 ++- external/flume/pom.xml | 12 ++-- pom.xml | 1 + 3 files changed, 13 insertions(+), 15 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/cd30db56/external/flume-sink/pom.xml -- diff --git a/external/flume-sink/pom.xml b/external/flume-sink/pom.xml index 19192e4..c1e8e65 100644 --- a/external/flume-sink/pom.xml +++ b/external/flume-sink/pom.xml @@ -25,19 +25,20 @@ relativePath../../pom.xml/relativePath /parent + groupIdorg.apache.spark/groupId artifactIdspark-streaming-flume-sink_2.10/artifactId properties sbt.project.namestreaming-flume-sink/sbt.project.name /properties - packagingjar/packaging nameSpark Project External Flume Sink/name urlhttp://spark.apache.org//url + dependencies dependency groupIdorg.apache.flume/groupId artifactIdflume-ng-sdk/artifactId - version1.4.0/version + version${flume.version}/version exclusions exclusion groupIdio.netty/groupId @@ -52,7 +53,7 @@ dependency groupIdorg.apache.flume/groupId artifactIdflume-ng-core/artifactId - version1.4.0/version + version${flume.version}/version exclusions exclusion groupIdio.netty/groupId @@ -62,11 +63,7 @@ groupIdorg.apache.thrift/groupId artifactIdlibthrift/artifactId /exclusion -/exclusions -/dependency -dependency - groupIdorg.scala-lang/groupId - artifactIdscala-library/artifactId + /exclusions /dependency dependency groupIdorg.scalatest/groupId @@ -97,7 +94,7 @@ plugin groupIdorg.apache.avro/groupId artifactIdavro-maven-plugin/artifactId -version1.7.3/version +version${avro.version}/version configuration !-- Generate the output in the same directory as the sbt-avro-plugin -- outputDirectory${project.basedir}/target/scala-${scala.binary.version}/src_managed/main/compiled_avro/outputDirectory http://git-wip-us.apache.org/repos/asf/spark/blob/cd30db56/external/flume/pom.xml -- diff --git a/external/flume/pom.xml b/external/flume/pom.xml index c532705..f71f6b6 100644 --- a/external/flume/pom.xml +++ b/external/flume/pom.xml @@ -42,6 +42,11 @@ /dependency dependency groupIdorg.apache.spark/groupId + artifactIdspark-streaming-flume-sink_${scala.binary.version}/artifactId + version${project.version}/version +/dependency +dependency + groupIdorg.apache.spark/groupId artifactIdspark-streaming_${scala.binary.version}/artifactId version${project.version}/version typetest-jar/type @@ -50,7 +55,7 @@ dependency groupIdorg.apache.flume/groupId artifactIdflume-ng-sdk/artifactId - version1.4.0/version + version${flume.version}/version exclusions exclusion groupIdio.netty/groupId @@ -82,11 +87,6 @@ artifactIdjunit-interface/artifactId scopetest/scope /dependency -dependency - groupIdorg.apache.spark/groupId - artifactIdspark-streaming-flume-sink_2.10/artifactId - version${project.version}/version -/dependency /dependencies build
git commit: [FIX] fix error message in sendMessageReliably
Repository: spark Updated Branches: refs/heads/master cc40a709c - fd8ace2d9 [FIX] fix error message in sendMessageReliably rxin Author: Xiangrui Meng m...@databricks.com Closes #2120 from mengxr/sendMessageReliably and squashes the following commits: b14400c [Xiangrui Meng] fix error message in sendMessageReliably Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/fd8ace2d Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/fd8ace2d Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/fd8ace2d Branch: refs/heads/master Commit: fd8ace2d9a796f69ce34ad202907008cd6e4d274 Parents: cc40a70 Author: Xiangrui Meng m...@databricks.com Authored: Mon Aug 25 14:55:20 2014 -0700 Committer: Josh Rosen joshro...@apache.org Committed: Mon Aug 25 14:55:20 2014 -0700 -- .../main/scala/org/apache/spark/network/ConnectionManager.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/fd8ace2d/core/src/main/scala/org/apache/spark/network/ConnectionManager.scala -- diff --git a/core/src/main/scala/org/apache/spark/network/ConnectionManager.scala b/core/src/main/scala/org/apache/spark/network/ConnectionManager.scala index e5e1e72..578d806 100644 --- a/core/src/main/scala/org/apache/spark/network/ConnectionManager.scala +++ b/core/src/main/scala/org/apache/spark/network/ConnectionManager.scala @@ -851,8 +851,8 @@ private[spark] class ConnectionManager( messageStatuses.synchronized { messageStatuses.remove(message.id).foreach ( s = { promise.failure( - new IOException(ssendMessageReliably failed because ack + -was not received within ${ackTimeout} sec)) + new IOException(sendMessageReliably failed because ack + +swas not received within $ackTimeout sec)) }) } } - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
git commit: [SPARK-3204][SQL] MaxOf would be foldable if both left and right are foldable.
Repository: spark Updated Branches: refs/heads/branch-1.1 8d33a6d3d - 19b01d6f7 [SPARK-3204][SQL] MaxOf would be foldable if both left and right are foldable. Author: Takuya UESHIN ues...@happy-camper.st Closes #2116 from ueshin/issues/SPARK-3204 and squashes the following commits: 7d9b107 [Takuya UESHIN] Make MaxOf foldable if both left and right are foldable. (cherry picked from commit d299e2bf2f6733a6267b7ce85e2b288608b17db3) Signed-off-by: Michael Armbrust mich...@databricks.com Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/19b01d6f Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/19b01d6f Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/19b01d6f Branch: refs/heads/branch-1.1 Commit: 19b01d6f79f2919257fcd14524bc8267c57eb3d9 Parents: 8d33a6d Author: Takuya UESHIN ues...@happy-camper.st Authored: Mon Aug 25 16:27:00 2014 -0700 Committer: Michael Armbrust mich...@databricks.com Committed: Mon Aug 25 16:27:17 2014 -0700 -- .../org/apache/spark/sql/catalyst/expressions/arithmetic.scala | 2 ++ 1 file changed, 2 insertions(+) -- http://git-wip-us.apache.org/repos/asf/spark/blob/19b01d6f/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala -- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala index 8d90614..5f8b6ae 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala @@ -89,6 +89,8 @@ case class Remainder(left: Expression, right: Expression) extends BinaryArithmet case class MaxOf(left: Expression, right: Expression) extends Expression { type EvaluatedType = Any + override def foldable = left.foldable right.foldable + override def nullable = left.nullable right.nullable override def children = left :: right :: Nil - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
git commit: [SPARK-2929][SQL] Refactored Thrift server and CLI suites
Repository: spark Updated Branches: refs/heads/master d299e2bf2 - cae9414d3 [SPARK-2929][SQL] Refactored Thrift server and CLI suites Removed most hard coded timeout, timing assumptions and all `Thread.sleep`. Simplified IPC and synchronization with `scala.sys.process` and future/promise so that the test suites can run more robustly and faster. Author: Cheng Lian lian.cs@gmail.com Closes #1856 from liancheng/thriftserver-tests and squashes the following commits: 2d914ca [Cheng Lian] Minor refactoring 0e12e71 [Cheng Lian] Cleaned up test output 0ee921d [Cheng Lian] Refactored Thrift server and CLI suites Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/cae9414d Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/cae9414d Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/cae9414d Branch: refs/heads/master Commit: cae9414d3805c6cf00eab6a6144d8f90cd0212f8 Parents: d299e2b Author: Cheng Lian lian.cs@gmail.com Authored: Mon Aug 25 16:29:59 2014 -0700 Committer: Michael Armbrust mich...@databricks.com Committed: Mon Aug 25 16:29:59 2014 -0700 -- .../spark/sql/hive/thriftserver/CliSuite.scala | 121 --- .../thriftserver/HiveThriftServer2Suite.scala | 212 +++ .../spark/sql/hive/thriftserver/TestUtils.scala | 108 -- 3 files changed, 217 insertions(+), 224 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/cae9414d/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala -- diff --git a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala index 70bea1e..3475c2c 100644 --- a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala +++ b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala @@ -18,41 +18,112 @@ package org.apache.spark.sql.hive.thriftserver -import java.io.{BufferedReader, InputStreamReader, PrintWriter} +import scala.collection.mutable.ArrayBuffer +import scala.concurrent.ExecutionContext.Implicits.global +import scala.concurrent.duration._ +import scala.concurrent.{Await, Future, Promise} +import scala.sys.process.{Process, ProcessLogger} + +import java.io._ +import java.util.concurrent.atomic.AtomicInteger import org.apache.hadoop.hive.conf.HiveConf.ConfVars import org.scalatest.{BeforeAndAfterAll, FunSuite} -class CliSuite extends FunSuite with BeforeAndAfterAll with TestUtils { - val WAREHOUSE_PATH = TestUtils.getWarehousePath(cli) - val METASTORE_PATH = TestUtils.getMetastorePath(cli) +import org.apache.spark.Logging +import org.apache.spark.sql.catalyst.util.getTempFilePath + +class CliSuite extends FunSuite with BeforeAndAfterAll with Logging { + def runCliWithin( + timeout: FiniteDuration, + extraArgs: Seq[String] = Seq.empty)( + queriesAndExpectedAnswers: (String, String)*) { + +val (queries, expectedAnswers) = queriesAndExpectedAnswers.unzip +val warehousePath = getTempFilePath(warehouse) +val metastorePath = getTempFilePath(metastore) +val cliScript = ../../bin/spark-sql.split(/).mkString(File.separator) - override def beforeAll() { -val jdbcUrl = sjdbc:derby:;databaseName=$METASTORE_PATH;create=true -val commands = - s../../bin/spark-sql +val command = { + val jdbcUrl = sjdbc:derby:;databaseName=$metastorePath;create=true + s$cliScript | --master local | --hiveconf ${ConfVars.METASTORECONNECTURLKEY}=$jdbcUrl - | --hiveconf ${ConfVars.METASTOREWAREHOUSE}=$WAREHOUSE_PATH - .stripMargin.split(\\s+) - -val pb = new ProcessBuilder(commands: _*) -process = pb.start() -outputWriter = new PrintWriter(process.getOutputStream, true) -inputReader = new BufferedReader(new InputStreamReader(process.getInputStream)) -errorReader = new BufferedReader(new InputStreamReader(process.getErrorStream)) -waitForOutput(inputReader, spark-sql) + | --hiveconf ${ConfVars.METASTOREWAREHOUSE}=$warehousePath + .stripMargin.split(\\s+).toSeq ++ extraArgs +} + +// AtomicInteger is needed because stderr and stdout of the forked process are handled in +// different threads. +val next = new AtomicInteger(0) +val foundAllExpectedAnswers = Promise.apply[Unit]() +val queryStream = new ByteArrayInputStream(queries.mkString(\n).getBytes) +val buffer = new ArrayBuffer[String]() + +def captureOutput(source: String)(line: String) { + buffer += s$source $line + if (line.contains(expectedAnswers(next.get( { +if (next.incrementAndGet()
git commit: [SPARK-2929][SQL] Refactored Thrift server and CLI suites
Repository: spark Updated Branches: refs/heads/branch-1.1 19b01d6f7 - 292f28d4f [SPARK-2929][SQL] Refactored Thrift server and CLI suites Removed most hard coded timeout, timing assumptions and all `Thread.sleep`. Simplified IPC and synchronization with `scala.sys.process` and future/promise so that the test suites can run more robustly and faster. Author: Cheng Lian lian.cs@gmail.com Closes #1856 from liancheng/thriftserver-tests and squashes the following commits: 2d914ca [Cheng Lian] Minor refactoring 0e12e71 [Cheng Lian] Cleaned up test output 0ee921d [Cheng Lian] Refactored Thrift server and CLI suites (cherry picked from commit cae9414d3805c6cf00eab6a6144d8f90cd0212f8) Signed-off-by: Michael Armbrust mich...@databricks.com Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/292f28d4 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/292f28d4 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/292f28d4 Branch: refs/heads/branch-1.1 Commit: 292f28d4f7cbfdb8b90809926a6d69df7ed817e7 Parents: 19b01d6 Author: Cheng Lian lian.cs@gmail.com Authored: Mon Aug 25 16:29:59 2014 -0700 Committer: Michael Armbrust mich...@databricks.com Committed: Mon Aug 25 16:30:15 2014 -0700 -- .../spark/sql/hive/thriftserver/CliSuite.scala | 121 --- .../thriftserver/HiveThriftServer2Suite.scala | 212 +++ .../spark/sql/hive/thriftserver/TestUtils.scala | 108 -- 3 files changed, 217 insertions(+), 224 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/292f28d4/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala -- diff --git a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala index 70bea1e..3475c2c 100644 --- a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala +++ b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala @@ -18,41 +18,112 @@ package org.apache.spark.sql.hive.thriftserver -import java.io.{BufferedReader, InputStreamReader, PrintWriter} +import scala.collection.mutable.ArrayBuffer +import scala.concurrent.ExecutionContext.Implicits.global +import scala.concurrent.duration._ +import scala.concurrent.{Await, Future, Promise} +import scala.sys.process.{Process, ProcessLogger} + +import java.io._ +import java.util.concurrent.atomic.AtomicInteger import org.apache.hadoop.hive.conf.HiveConf.ConfVars import org.scalatest.{BeforeAndAfterAll, FunSuite} -class CliSuite extends FunSuite with BeforeAndAfterAll with TestUtils { - val WAREHOUSE_PATH = TestUtils.getWarehousePath(cli) - val METASTORE_PATH = TestUtils.getMetastorePath(cli) +import org.apache.spark.Logging +import org.apache.spark.sql.catalyst.util.getTempFilePath + +class CliSuite extends FunSuite with BeforeAndAfterAll with Logging { + def runCliWithin( + timeout: FiniteDuration, + extraArgs: Seq[String] = Seq.empty)( + queriesAndExpectedAnswers: (String, String)*) { + +val (queries, expectedAnswers) = queriesAndExpectedAnswers.unzip +val warehousePath = getTempFilePath(warehouse) +val metastorePath = getTempFilePath(metastore) +val cliScript = ../../bin/spark-sql.split(/).mkString(File.separator) - override def beforeAll() { -val jdbcUrl = sjdbc:derby:;databaseName=$METASTORE_PATH;create=true -val commands = - s../../bin/spark-sql +val command = { + val jdbcUrl = sjdbc:derby:;databaseName=$metastorePath;create=true + s$cliScript | --master local | --hiveconf ${ConfVars.METASTORECONNECTURLKEY}=$jdbcUrl - | --hiveconf ${ConfVars.METASTOREWAREHOUSE}=$WAREHOUSE_PATH - .stripMargin.split(\\s+) - -val pb = new ProcessBuilder(commands: _*) -process = pb.start() -outputWriter = new PrintWriter(process.getOutputStream, true) -inputReader = new BufferedReader(new InputStreamReader(process.getInputStream)) -errorReader = new BufferedReader(new InputStreamReader(process.getErrorStream)) -waitForOutput(inputReader, spark-sql) + | --hiveconf ${ConfVars.METASTOREWAREHOUSE}=$warehousePath + .stripMargin.split(\\s+).toSeq ++ extraArgs +} + +// AtomicInteger is needed because stderr and stdout of the forked process are handled in +// different threads. +val next = new AtomicInteger(0) +val foundAllExpectedAnswers = Promise.apply[Unit]() +val queryStream = new ByteArrayInputStream(queries.mkString(\n).getBytes) +val buffer = new ArrayBuffer[String]() + +def captureOutput(source: String)(line:
git commit: [SPARK-3058] [SQL] Support EXTENDED for EXPLAIN
Repository: spark Updated Branches: refs/heads/branch-1.1 292f28d4f - f8ac8ed7f [SPARK-3058] [SQL] Support EXTENDED for EXPLAIN Provide `extended` keyword support for `explain` command in SQL. e.g. ``` explain extended select key as a1, value as a2 from src where key=1; == Parsed Logical Plan == Project ['key AS a1#3,'value AS a2#4] Filter ('key = 1) UnresolvedRelation None, src, None == Analyzed Logical Plan == Project [key#8 AS a1#3,value#9 AS a2#4] Filter (CAST(key#8, DoubleType) = CAST(1, DoubleType)) MetastoreRelation default, src, None == Optimized Logical Plan == Project [key#8 AS a1#3,value#9 AS a2#4] Filter (CAST(key#8, DoubleType) = 1.0) MetastoreRelation default, src, None == Physical Plan == Project [key#8 AS a1#3,value#9 AS a2#4] Filter (CAST(key#8, DoubleType) = 1.0) HiveTableScan [key#8,value#9], (MetastoreRelation default, src, None), None Code Generation: false == RDD == (2) MappedRDD[14] at map at HiveContext.scala:350 MapPartitionsRDD[13] at mapPartitions at basicOperators.scala:42 MapPartitionsRDD[12] at mapPartitions at basicOperators.scala:57 MapPartitionsRDD[11] at mapPartitions at TableReader.scala:112 MappedRDD[10] at map at TableReader.scala:240 HadoopRDD[9] at HadoopRDD at TableReader.scala:230 ``` It's the sub task of #1847. But can go without any dependency. Author: Cheng Hao hao.ch...@intel.com Closes #1962 from chenghao-intel/explain_extended and squashes the following commits: 295db74 [Cheng Hao] Fix bug in printing the simple execution plan 48bc989 [Cheng Hao] Support EXTENDED for EXPLAIN (cherry picked from commit 156eb3966176de02ec3ec90ae10e50a7ebfbbf4f) Signed-off-by: Michael Armbrust mich...@databricks.com Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/f8ac8ed7 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/f8ac8ed7 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/f8ac8ed7 Branch: refs/heads/branch-1.1 Commit: f8ac8ed7f88d2ee976b38d4a156f64efb3740650 Parents: 292f28d Author: Cheng Hao hao.ch...@intel.com Authored: Mon Aug 25 17:43:56 2014 -0700 Committer: Michael Armbrust mich...@databricks.com Committed: Mon Aug 25 17:44:27 2014 -0700 -- .../sql/catalyst/plans/logical/commands.scala | 2 +- .../scala/org/apache/spark/sql/SQLContext.scala | 13 +++-- .../spark/sql/execution/SparkStrategies.scala | 4 +- .../apache/spark/sql/execution/commands.scala | 10 ++-- .../org/apache/spark/sql/hive/HiveContext.scala | 2 +- .../org/apache/spark/sql/hive/HiveQl.scala | 5 +- .../sql/hive/execution/HiveExplainSuite.scala | 54 .../sql/hive/execution/HiveQuerySuite.scala | 2 +- 8 files changed, 78 insertions(+), 14 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/f8ac8ed7/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/commands.scala -- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/commands.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/commands.scala index 481a5a4..a01809c 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/commands.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/commands.scala @@ -50,7 +50,7 @@ case class SetCommand(key: Option[String], value: Option[String]) extends Comman * Returned by a parser when the users only wants to see what query plan would be executed, without * actually performing the execution. */ -case class ExplainCommand(plan: LogicalPlan) extends Command { +case class ExplainCommand(plan: LogicalPlan, extended: Boolean = false) extends Command { override def output = Seq(AttributeReference(plan, StringType, nullable = false)()) } http://git-wip-us.apache.org/repos/asf/spark/blob/f8ac8ed7/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala -- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala b/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala index af9f7c6..8a9f4de 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala @@ -408,10 +408,18 @@ class SQLContext(@transient val sparkContext: SparkContext) protected def stringOrError[A](f: = A): String = try f.toString catch { case e: Throwable = e.toString } -def simpleString: String = stringOrError(executedPlan) +def simpleString: String = + s== Physical Plan == + |${stringOrError(executedPlan)} + override def toString: String = - s== Logical Plan == + // TODO
git commit: [SQL] logWarning should be logInfo in getResultSetSchema
Repository: spark Updated Branches: refs/heads/master 156eb3966 - 507a1b520 [SQL] logWarning should be logInfo in getResultSetSchema Author: wangfei wangfei_he...@126.com Closes #1939 from scwf/patch-5 and squashes the following commits: f952d10 [wangfei] [SQL] logWarning should be logInfo in getResultSetSchema Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/507a1b52 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/507a1b52 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/507a1b52 Branch: refs/heads/master Commit: 507a1b520063ad3e10b909767d9e3fd72d24415b Parents: 156eb39 Author: wangfei wangfei_he...@126.com Authored: Mon Aug 25 17:46:43 2014 -0700 Committer: Michael Armbrust mich...@databricks.com Committed: Mon Aug 25 17:46:43 2014 -0700 -- .../sql/hive/thriftserver/server/SparkSQLOperationManager.scala| 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/507a1b52/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/server/SparkSQLOperationManager.scala -- diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/server/SparkSQLOperationManager.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/server/SparkSQLOperationManager.scala index 699a110..6eccb1b 100644 --- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/server/SparkSQLOperationManager.scala +++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/server/SparkSQLOperationManager.scala @@ -151,7 +151,7 @@ class SparkSQLOperationManager(hiveContext: HiveContext) extends OperationManage } def getResultSetSchema: TableSchema = { -logWarning(sResult Schema: ${result.queryExecution.analyzed.output}) +logInfo(sResult Schema: ${result.queryExecution.analyzed.output}) if (result.queryExecution.analyzed.output.size == 0) { new TableSchema(new FieldSchema(Result, string, ) :: Nil) } else { - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
git commit: [SPARK-3011][SQL] _temporary directory should be filtered out by sqlContext.parquetFile
Repository: spark Updated Branches: refs/heads/master 507a1b520 - 4243bb663 [SPARK-3011][SQL] _temporary directory should be filtered out by sqlContext.parquetFile fix compile error on hadoop 0.23 for the pull request #1924. Author: Chia-Yung Su chiay...@appier.com Closes #1959 from joesu/bugfix-spark3011 and squashes the following commits: be30793 [Chia-Yung Su] remove .* and _* except _metadata 8fe2398 [Chia-Yung Su] add note to explain 40ea9bd [Chia-Yung Su] fix hadoop-0.23 compile error c7e44f2 [Chia-Yung Su] match syntax f8fc32a [Chia-Yung Su] filter out tmp dir Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/4243bb66 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/4243bb66 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/4243bb66 Branch: refs/heads/master Commit: 4243bb6634aca5b9ddf6d42778aa7b4866ce6256 Parents: 507a1b5 Author: Chia-Yung Su chiay...@appier.com Authored: Mon Aug 25 18:20:19 2014 -0700 Committer: Michael Armbrust mich...@databricks.com Committed: Mon Aug 25 18:20:19 2014 -0700 -- .../src/main/scala/org/apache/spark/sql/parquet/ParquetTypes.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/4243bb66/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetTypes.scala -- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetTypes.scala b/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetTypes.scala index c79a9ac..af8cd0a 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetTypes.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetTypes.scala @@ -378,7 +378,7 @@ private[parquet] object ParquetTypesConverter extends Logging { val children = fs.listStatus(path).filterNot { status = val name = status.getPath.getName - name(0) == '.' || name == FileOutputCommitter.SUCCEEDED_FILE_NAME + (name(0) == '.' || name(0) == '_') name != ParquetFileWriter.PARQUET_METADATA_FILE } // NOTE (lian): Parquet _metadata file can be very slow if the file consists of lots of row - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
git commit: [SPARK-3011][SQL] _temporary directory should be filtered out by sqlContext.parquetFile
Repository: spark Updated Branches: refs/heads/branch-1.1 957b35657 - b5dc9b43b [SPARK-3011][SQL] _temporary directory should be filtered out by sqlContext.parquetFile fix compile error on hadoop 0.23 for the pull request #1924. Author: Chia-Yung Su chiay...@appier.com Closes #1959 from joesu/bugfix-spark3011 and squashes the following commits: be30793 [Chia-Yung Su] remove .* and _* except _metadata 8fe2398 [Chia-Yung Su] add note to explain 40ea9bd [Chia-Yung Su] fix hadoop-0.23 compile error c7e44f2 [Chia-Yung Su] match syntax f8fc32a [Chia-Yung Su] filter out tmp dir (cherry picked from commit 4243bb6634aca5b9ddf6d42778aa7b4866ce6256) Signed-off-by: Michael Armbrust mich...@databricks.com Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/b5dc9b43 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/b5dc9b43 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/b5dc9b43 Branch: refs/heads/branch-1.1 Commit: b5dc9b43bcdcbdb5ffddbda6235443f3d7411b7a Parents: 957b356 Author: Chia-Yung Su chiay...@appier.com Authored: Mon Aug 25 18:20:19 2014 -0700 Committer: Michael Armbrust mich...@databricks.com Committed: Mon Aug 25 18:20:35 2014 -0700 -- .../src/main/scala/org/apache/spark/sql/parquet/ParquetTypes.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/b5dc9b43/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetTypes.scala -- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetTypes.scala b/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetTypes.scala index c79a9ac..af8cd0a 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetTypes.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetTypes.scala @@ -378,7 +378,7 @@ private[parquet] object ParquetTypesConverter extends Logging { val children = fs.listStatus(path).filterNot { status = val name = status.getPath.getName - name(0) == '.' || name == FileOutputCommitter.SUCCEEDED_FILE_NAME + (name(0) == '.' || name(0) == '_') name != ParquetFileWriter.PARQUET_METADATA_FILE } // NOTE (lian): Parquet _metadata file can be very slow if the file consists of lots of row - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
svn commit: r1620493 [2/2] - in /spark: screencasts/_posts/ site/ site/news/ site/releases/ site/screencasts/
Modified: spark/site/releases/spark-release-0-8-1.html URL: http://svn.apache.org/viewvc/spark/site/releases/spark-release-0-8-1.html?rev=1620493r1=1620492r2=1620493view=diff == --- spark/site/releases/spark-release-0-8-1.html (original) +++ spark/site/releases/spark-release-0-8-1.html Tue Aug 26 01:53:10 2014 @@ -163,7 +163,7 @@ pApache Spark 0.8.1 is a maintenance and performance release for the Scala 2.9 version of Spark. It also adds several new features, such as standalone mode high availability, that will appear in Spark 0.9 but developers wanted to have in Scala 2.9. Contributions to 0.8.1 came from 41 developers./p h3 id=yarn-22-supportYARN 2.2 Support/h3 -pSupport has been added for running Spark on YARN 2.2 and newer. Due to a change in the YARN API between previous versions and 2.2+, this was not supported in Spark 0.8.0. See the a href=/docs/0.8.1/running-on-yarn.htmlYARN documentation/a for specific instructions on how to build Spark for YARN 2.2+. We#8217;ve also included a pre-compiled binary for YARN 2.2./p +pSupport has been added for running Spark on YARN 2.2 and newer. Due to a change in the YARN API between previous versions and 2.2+, this was not supported in Spark 0.8.0. See the a href=/docs/0.8.1/running-on-yarn.htmlYARN documentation/a for specific instructions on how to build Spark for YARN 2.2+. Weâve also included a pre-compiled binary for YARN 2.2./p h3 id=high-availability-mode-for-standalone-cluster-managerHigh Availability Mode for Standalone Cluster Manager/h3 pThe standalone cluster manager now has a high availability (H/A) mode which can tolerate master failures. This is particularly useful for long-running applications such as streaming jobs and the shark server, where the scheduler master previously represented a single point of failure. Instructions for deploying H/A mode are included a href=/docs/0.8.1/spark-standalone.html#high-availabilityin the documentation/a. The current implementation uses Zookeeper for coordination./p @@ -174,7 +174,7 @@ ul liOptimized hashtables for shuffle data - reduces memory and CPU consumption/li liEfficient encoding for JobConfs - improves latency for stages reading large numbers of blocks from HDFS, S3, and HBase/li - liShuffle file consolidation (off by default) - reduces the number of files created in large shuffles for better filesystem performance. This change works best on filesystems newer than ext3 (we recommend ext4 or XFS), and it will be the default in Spark 0.9, but weâve left it off by default for compatibility. We recommend users turn this on unless they are using ext3 by setting codespark.shuffle.consolidateFiles/code to #8220;true#8221;./li + liShuffle file consolidation (off by default) - reduces the number of files created in large shuffles for better filesystem performance. This change works best on filesystems newer than ext3 (we recommend ext4 or XFS), and it will be the default in Spark 0.9, but weâve left it off by default for compatibility. We recommend users turn this on unless they are using ext3 by setting codespark.shuffle.consolidateFiles/code to âtrueâ./li liTorrent broadcast (off by default) - a faster broadcast implementation for large objects./li liSupport for fetching large result sets - allows tasks to return large results without tuning Akka buffer sizes./li /ul @@ -211,47 +211,47 @@ h3 id=creditsCredits/h3 ul - liMichael Armbrust #8211; build fix/li - liPierre Borckmans #8211; typo fix in documentation/li - liEvan Chan #8211; codelocal:///code scheme for dependency jars/li - liEwen Cheslack-Postava #8211; codeadd/code method for python accumulators, support for setting config properties in python/li - liMosharaf Chowdhury #8211; optimized broadcast implementation/li - liFrank Dai #8211; documentation fix/li - liAaron Davidson #8211; shuffle file consolidation, H/A mode for standalone scheduler, cleaned up representation of block IDs, several improvements and bug fixes/li - liTathagata Das #8211; new streaming operators, fix for kafka concurrency bug/li - liAnkur Dave #8211; support for pausing spot clusters on EC2/li - liHarvey Feng #8211; optimization to JobConf broadcasts, bug fixes, YARN 2.2 build/li - liAli Ghodsi #8211; YARN 2.2 build/li - liThomas Graves #8211; Spark YARN integration including secure HDFS access over YARN/li - liLi Guoqiang #8211; fix for Maven build/li - liStephen Haberman #8211; bug fix/li - liHaidar Hadi #8211; documentation fix/li - liNathan Howell #8211; bug fix relating to YARN/li - liHolden Karau #8211; Java version of codemapPartitionsWithIndex/code/li - liDu Li #8211; bug fix in make-distrubion.sh/li - liRaymond Liu #8211; work on YARN 2.2 build/li - liXi Liu #8211; bug fix and code clean-up/li - liDavid McCauley #8211; bug fix in standalone mode JSON output/li - liMichael (wannabeast) #8211;
svn commit: r1620493 [1/2] - in /spark: screencasts/_posts/ site/ site/news/ site/releases/ site/screencasts/
Author: matei Date: Tue Aug 26 01:53:10 2014 New Revision: 1620493 URL: http://svn.apache.org/r1620493 Log: Updated screecast links to work over HTTPS too Modified: spark/screencasts/_posts/2013-04-10-1-first-steps-with-spark.md spark/screencasts/_posts/2013-04-11-2-spark-documentation-overview.md spark/screencasts/_posts/2013-04-16-3-transformations-and-caching.md spark/screencasts/_posts/2013-08-26-4-a-standalone-job-in-spark.md spark/site/downloads.html spark/site/news/amp-camp-2013-registration-ope.html spark/site/news/index.html spark/site/news/run-spark-and-shark-on-amazon-emr.html spark/site/news/spark-0-6-1-and-0-5-2-released.html spark/site/news/spark-0-7-0-released.html spark/site/news/spark-0-7-2-released.html spark/site/news/spark-0-7-3-released.html spark/site/news/spark-0-8-0-released.html spark/site/news/spark-0-8-1-released.html spark/site/news/spark-0-9-0-released.html spark/site/news/spark-1-0-0-released.html spark/site/news/spark-1-0-1-released.html spark/site/news/spark-and-shark-in-the-news.html spark/site/news/spark-becomes-tlp.html spark/site/news/spark-meetups.html spark/site/news/spark-user-survey-and-powered-by-page.html spark/site/news/strata-exercises-now-available-online.html spark/site/news/submit-talks-to-spark-summit-2014.html spark/site/news/two-weeks-to-spark-summit-2014.html spark/site/news/video-from-first-spark-development-meetup.html spark/site/releases/spark-release-0-3.html spark/site/releases/spark-release-0-5-0.html spark/site/releases/spark-release-0-5-1.html spark/site/releases/spark-release-0-6-0.html spark/site/releases/spark-release-0-7-0.html spark/site/releases/spark-release-0-8-0.html spark/site/releases/spark-release-0-8-1.html spark/site/releases/spark-release-0-9-0.html spark/site/releases/spark-release-0-9-1.html spark/site/releases/spark-release-0-9-2.html spark/site/releases/spark-release-1-0-0.html spark/site/releases/spark-release-1-0-1.html spark/site/releases/spark-release-1-0-2.html spark/site/screencasts/1-first-steps-with-spark.html spark/site/screencasts/2-spark-documentation-overview.html spark/site/screencasts/3-transformations-and-caching.html spark/site/screencasts/4-a-standalone-job-in-spark.html Modified: spark/screencasts/_posts/2013-04-10-1-first-steps-with-spark.md URL: http://svn.apache.org/viewvc/spark/screencasts/_posts/2013-04-10-1-first-steps-with-spark.md?rev=1620493r1=1620492r2=1620493view=diff == --- spark/screencasts/_posts/2013-04-10-1-first-steps-with-spark.md (original) +++ spark/screencasts/_posts/2013-04-10-1-first-steps-with-spark.md Tue Aug 26 01:53:10 2014 @@ -18,7 +18,7 @@ This screencast marks the beginning of a liIntroduce the API using the Spark interactive shell to explore a file./li /ol -div class=video-container video-square shadowiframe width=755 height=705 src=http://www.youtube.com/embed/bWorBGOFBWY?autohide=0showinfo=0list=PL-x35fyliRwhKT-NpTKprPW1bkbdDcTTW; frameborder=0 allowfullscreen/iframe/div +div class=video-container video-square shadowiframe width=755 height=705 src=//www.youtube.com/embed/bWorBGOFBWY?autohide=0showinfo=0list=PL-x35fyliRwhKT-NpTKprPW1bkbdDcTTW frameborder=0 allowfullscreen/iframe/div Check out the next spark screencast in the series, a href={{site.url}}screencasts/2-spark-documentation-overview.htmlSpark Screencast #2 - Overview of Spark Documentation/a. Modified: spark/screencasts/_posts/2013-04-11-2-spark-documentation-overview.md URL: http://svn.apache.org/viewvc/spark/screencasts/_posts/2013-04-11-2-spark-documentation-overview.md?rev=1620493r1=1620492r2=1620493view=diff == --- spark/screencasts/_posts/2013-04-11-2-spark-documentation-overview.md (original) +++ spark/screencasts/_posts/2013-04-11-2-spark-documentation-overview.md Tue Aug 26 01:53:10 2014 @@ -10,7 +10,7 @@ published: true --- This is our 2nd Spark screencast. In it, we take a tour of the documentation available for Spark users online. -div class=video-container video-square shadowiframe width=755 height=705 src=http://www.youtube.com/embed/Dbqe_rv-NJQ?autohide=0showinfo=0list=PL-x35fyliRwhKT-NpTKprPW1bkbdDcTTW; frameborder=0 allowfullscreen/iframe/div +div class=video-container video-square shadowiframe width=755 height=705 src=//www.youtube.com/embed/Dbqe_rv-NJQ?autohide=0showinfo=0list=PL-x35fyliRwhKT-NpTKprPW1bkbdDcTTW frameborder=0 allowfullscreen/iframe/div Check out the next spark screencast in the series, a href={{site.url}}screencasts/3-transformations-and-caching.htmlSpark Screencast #3 - Transformations and Caching/a. Modified: spark/screencasts/_posts/2013-04-16-3-transformations-and-caching.md URL:
git commit: [Spark-3222] [SQL] Cross join support in HiveQL
Repository: spark Updated Branches: refs/heads/master 62f5009f6 - 52fbdc2de [Spark-3222] [SQL] Cross join support in HiveQL We can simple treat cross join as inner join without join conditions. Author: Daoyuan Wang daoyuan.w...@intel.com Author: adrian-wang daoyuanw...@gmail.com Closes #2124 from adrian-wang/crossjoin and squashes the following commits: 8c9b7c5 [Daoyuan Wang] add a test 7d47bbb [adrian-wang] add cross join support for hql Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/52fbdc2d Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/52fbdc2d Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/52fbdc2d Branch: refs/heads/master Commit: 52fbdc2deddcdba02bf5945a36e15870021ec890 Parents: 62f5009 Author: Daoyuan Wang daoyuan.w...@intel.com Authored: Mon Aug 25 22:56:35 2014 -0700 Committer: Michael Armbrust mich...@databricks.com Committed: Mon Aug 25 22:56:35 2014 -0700 -- .../apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala| 1 + sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala | 1 + .../resources/golden/cross_join-0-7e4af1870bc73decae43b3383c7d2046 | 0 .../resources/golden/cross_join-1-1a96761bf3e47ace9a422ed58273ff35 | 0 .../resources/golden/cross_join-2-85c93a81eae05bf56a04a904bb80a229 | 0 5 files changed, 2 insertions(+) -- http://git-wip-us.apache.org/repos/asf/spark/blob/52fbdc2d/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala -- diff --git a/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala b/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala index 210753e..6624387 100644 --- a/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala +++ b/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala @@ -310,6 +310,7 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter { create_nested_type, create_skewed_table1, create_struct_table, +cross_join, ct_case_insensitive, database_location, database_properties, http://git-wip-us.apache.org/repos/asf/spark/blob/52fbdc2d/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala -- diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala index 5da6e8d..581332e 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala @@ -772,6 +772,7 @@ private[hive] object HiveQl { val joinType = joinToken match { case TOK_JOIN = Inner +case TOK_CROSSJOIN = Inner case TOK_RIGHTOUTERJOIN = RightOuter case TOK_LEFTOUTERJOIN = LeftOuter case TOK_FULLOUTERJOIN = FullOuter http://git-wip-us.apache.org/repos/asf/spark/blob/52fbdc2d/sql/hive/src/test/resources/golden/cross_join-0-7e4af1870bc73decae43b3383c7d2046 -- diff --git a/sql/hive/src/test/resources/golden/cross_join-0-7e4af1870bc73decae43b3383c7d2046 b/sql/hive/src/test/resources/golden/cross_join-0-7e4af1870bc73decae43b3383c7d2046 new file mode 100644 index 000..e69de29 http://git-wip-us.apache.org/repos/asf/spark/blob/52fbdc2d/sql/hive/src/test/resources/golden/cross_join-1-1a96761bf3e47ace9a422ed58273ff35 -- diff --git a/sql/hive/src/test/resources/golden/cross_join-1-1a96761bf3e47ace9a422ed58273ff35 b/sql/hive/src/test/resources/golden/cross_join-1-1a96761bf3e47ace9a422ed58273ff35 new file mode 100644 index 000..e69de29 http://git-wip-us.apache.org/repos/asf/spark/blob/52fbdc2d/sql/hive/src/test/resources/golden/cross_join-2-85c93a81eae05bf56a04a904bb80a229 -- diff --git a/sql/hive/src/test/resources/golden/cross_join-2-85c93a81eae05bf56a04a904bb80a229 b/sql/hive/src/test/resources/golden/cross_join-2-85c93a81eae05bf56a04a904bb80a229 new file mode 100644 index 000..e69de29 - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
git commit: [Spark-3222] [SQL] Cross join support in HiveQL
Repository: spark Updated Branches: refs/heads/branch-1.1 4d6a0e920 - 48a07490f [Spark-3222] [SQL] Cross join support in HiveQL We can simple treat cross join as inner join without join conditions. Author: Daoyuan Wang daoyuan.w...@intel.com Author: adrian-wang daoyuanw...@gmail.com Closes #2124 from adrian-wang/crossjoin and squashes the following commits: 8c9b7c5 [Daoyuan Wang] add a test 7d47bbb [adrian-wang] add cross join support for hql (cherry picked from commit 52fbdc2deddcdba02bf5945a36e15870021ec890) Signed-off-by: Michael Armbrust mich...@databricks.com Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/48a07490 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/48a07490 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/48a07490 Branch: refs/heads/branch-1.1 Commit: 48a07490fdd0e79a34e66e5c1baad0b1558bbda5 Parents: 4d6a0e9 Author: Daoyuan Wang daoyuan.w...@intel.com Authored: Mon Aug 25 22:56:35 2014 -0700 Committer: Michael Armbrust mich...@databricks.com Committed: Mon Aug 25 22:56:53 2014 -0700 -- .../apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala| 1 + sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala | 1 + .../resources/golden/cross_join-0-7e4af1870bc73decae43b3383c7d2046 | 0 .../resources/golden/cross_join-1-1a96761bf3e47ace9a422ed58273ff35 | 0 .../resources/golden/cross_join-2-85c93a81eae05bf56a04a904bb80a229 | 0 5 files changed, 2 insertions(+) -- http://git-wip-us.apache.org/repos/asf/spark/blob/48a07490/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala -- diff --git a/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala b/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala index 210753e..6624387 100644 --- a/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala +++ b/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala @@ -310,6 +310,7 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter { create_nested_type, create_skewed_table1, create_struct_table, +cross_join, ct_case_insensitive, database_location, database_properties, http://git-wip-us.apache.org/repos/asf/spark/blob/48a07490/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala -- diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala index 5da6e8d..581332e 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala @@ -772,6 +772,7 @@ private[hive] object HiveQl { val joinType = joinToken match { case TOK_JOIN = Inner +case TOK_CROSSJOIN = Inner case TOK_RIGHTOUTERJOIN = RightOuter case TOK_LEFTOUTERJOIN = LeftOuter case TOK_FULLOUTERJOIN = FullOuter http://git-wip-us.apache.org/repos/asf/spark/blob/48a07490/sql/hive/src/test/resources/golden/cross_join-0-7e4af1870bc73decae43b3383c7d2046 -- diff --git a/sql/hive/src/test/resources/golden/cross_join-0-7e4af1870bc73decae43b3383c7d2046 b/sql/hive/src/test/resources/golden/cross_join-0-7e4af1870bc73decae43b3383c7d2046 new file mode 100644 index 000..e69de29 http://git-wip-us.apache.org/repos/asf/spark/blob/48a07490/sql/hive/src/test/resources/golden/cross_join-1-1a96761bf3e47ace9a422ed58273ff35 -- diff --git a/sql/hive/src/test/resources/golden/cross_join-1-1a96761bf3e47ace9a422ed58273ff35 b/sql/hive/src/test/resources/golden/cross_join-1-1a96761bf3e47ace9a422ed58273ff35 new file mode 100644 index 000..e69de29 http://git-wip-us.apache.org/repos/asf/spark/blob/48a07490/sql/hive/src/test/resources/golden/cross_join-2-85c93a81eae05bf56a04a904bb80a229 -- diff --git a/sql/hive/src/test/resources/golden/cross_join-2-85c93a81eae05bf56a04a904bb80a229 b/sql/hive/src/test/resources/golden/cross_join-2-85c93a81eae05bf56a04a904bb80a229 new file mode 100644 index 000..e69de29 - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org