spark git commit: [SPARK-17022][YARN] Handle potential deadlock in driver handling messages
Repository: spark Updated Branches: refs/heads/master 4ec5c360c -> ea0bf91b4 [SPARK-17022][YARN] Handle potential deadlock in driver handling messages ## What changes were proposed in this pull request? We directly send RequestExecutors to AM instead of transfer it to yarnShedulerBackend first, to avoid potential deadlock. ## How was this patch tested? manual tests Author: WangTaoTheTonic Closes #14605 from WangTaoTheTonic/lock. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/ea0bf91b Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/ea0bf91b Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/ea0bf91b Branch: refs/heads/master Commit: ea0bf91b4a2ca3ef472906e50e31fd6268b6f53e Parents: 4ec5c36 Author: WangTaoTheTonic Authored: Thu Aug 11 15:09:23 2016 -0700 Committer: Marcelo Vanzin Committed: Thu Aug 11 15:09:23 2016 -0700 -- .../scheduler/cluster/YarnSchedulerBackend.scala | 18 +++--- 1 file changed, 15 insertions(+), 3 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/ea0bf91b/yarn/src/main/scala/org/apache/spark/scheduler/cluster/YarnSchedulerBackend.scala -- diff --git a/yarn/src/main/scala/org/apache/spark/scheduler/cluster/YarnSchedulerBackend.scala b/yarn/src/main/scala/org/apache/spark/scheduler/cluster/YarnSchedulerBackend.scala index 6b3c831..ea63ff5 100644 --- a/yarn/src/main/scala/org/apache/spark/scheduler/cluster/YarnSchedulerBackend.scala +++ b/yarn/src/main/scala/org/apache/spark/scheduler/cluster/YarnSchedulerBackend.scala @@ -125,8 +125,20 @@ private[spark] abstract class YarnSchedulerBackend( * This includes executors already pending or running. */ override def doRequestTotalExecutors(requestedTotal: Int): Boolean = { -yarnSchedulerEndpointRef.askWithRetry[Boolean]( - RequestExecutors(requestedTotal, localityAwareTasks, hostToLocalTaskCount)) +val r = RequestExecutors(requestedTotal, localityAwareTasks, hostToLocalTaskCount) +yarnSchedulerEndpoint.amEndpoint match { + case Some(am) => +try { + am.askWithRetry[Boolean](r) +} catch { + case NonFatal(e) => +logError(s"Sending $r to AM was unsuccessful", e) +return false +} + case None => +logWarning("Attempted to request executors before the AM has registered!") +return false +} } /** @@ -209,7 +221,7 @@ private[spark] abstract class YarnSchedulerBackend( */ private class YarnSchedulerEndpoint(override val rpcEnv: RpcEnv) extends ThreadSafeRpcEndpoint with Logging { -private var amEndpoint: Option[RpcEndpointRef] = None +var amEndpoint: Option[RpcEndpointRef] = None private val askAmThreadPool = ThreadUtils.newDaemonCachedThreadPool("yarn-scheduler-ask-am-thread-pool") - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-13602][CORE] Add shutdown hook to DriverRunner to prevent driver process leak
Repository: spark Updated Branches: refs/heads/master cf9367826 -> 1c9a386c6 [SPARK-13602][CORE] Add shutdown hook to DriverRunner to prevent driver process leak ## What changes were proposed in this pull request? Added shutdown hook to DriverRunner to kill the driver process in case the Worker JVM exits suddenly and the `WorkerWatcher` was unable to properly catch this. Did some cleanup to consolidate driver state management and setting of finalized vars within the running thread. ## How was this patch tested? Added unit tests to verify that final state and exception variables are set accordingly for successfull, failed, and errors in the driver process. Retrofitted existing test to verify killing of mocked process ends with the correct state and stops properly Manually tested (with deploy-mode=cluster) that the shutdown hook is called by forcibly exiting the `Worker` and various points in the code with the `WorkerWatcher` both disabled and enabled. Also, manually killed the driver through the ui and verified that the `DriverRunner` interrupted, killed the process and exited properly. Author: Bryan Cutler Closes #11746 from BryanCutler/DriverRunner-shutdown-hook-SPARK-13602. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/1c9a386c Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/1c9a386c Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/1c9a386c Branch: refs/heads/master Commit: 1c9a386c6b6812a3931f3fb0004249894a01f657 Parents: cf93678 Author: Bryan Cutler Authored: Thu Aug 11 14:49:11 2016 -0700 Committer: Marcelo Vanzin Committed: Thu Aug 11 14:49:11 2016 -0700 -- .../spark/deploy/worker/DriverRunner.scala | 119 +++ .../spark/deploy/worker/DriverRunnerTest.scala | 73 +++- 2 files changed, 142 insertions(+), 50 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/1c9a386c/core/src/main/scala/org/apache/spark/deploy/worker/DriverRunner.scala -- diff --git a/core/src/main/scala/org/apache/spark/deploy/worker/DriverRunner.scala b/core/src/main/scala/org/apache/spark/deploy/worker/DriverRunner.scala index f4376de..289b0b9 100644 --- a/core/src/main/scala/org/apache/spark/deploy/worker/DriverRunner.scala +++ b/core/src/main/scala/org/apache/spark/deploy/worker/DriverRunner.scala @@ -32,7 +32,7 @@ import org.apache.spark.deploy.master.DriverState import org.apache.spark.deploy.master.DriverState.DriverState import org.apache.spark.internal.Logging import org.apache.spark.rpc.RpcEndpointRef -import org.apache.spark.util.{Clock, SystemClock, Utils} +import org.apache.spark.util.{Clock, ShutdownHookManager, SystemClock, Utils} /** * Manages the execution of one driver, including automatically restarting the driver on failure. @@ -53,9 +53,11 @@ private[deploy] class DriverRunner( @volatile private var killed = false // Populated once finished - private[worker] var finalState: Option[DriverState] = None - private[worker] var finalException: Option[Exception] = None - private var finalExitCode: Option[Int] = None + @volatile private[worker] var finalState: Option[DriverState] = None + @volatile private[worker] var finalException: Option[Exception] = None + + // Timeout to wait for when trying to terminate a driver. + private val DRIVER_TERMINATE_TIMEOUT_MS = 10 * 1000 // Decoupled for testing def setClock(_clock: Clock): Unit = { @@ -78,49 +80,53 @@ private[deploy] class DriverRunner( private[worker] def start() = { new Thread("DriverRunner for " + driverId) { override def run() { +var shutdownHook: AnyRef = null try { - val driverDir = createWorkingDirectory() - val localJarFilename = downloadUserJar(driverDir) - - def substituteVariables(argument: String): String = argument match { -case "{{WORKER_URL}}" => workerUrl -case "{{USER_JAR}}" => localJarFilename -case other => other + shutdownHook = ShutdownHookManager.addShutdownHook { () => +logInfo(s"Worker shutting down, killing driver $driverId") +kill() } - // TODO: If we add ability to submit multiple jars they should also be added here - val builder = CommandUtils.buildProcessBuilder(driverDesc.command, securityManager, -driverDesc.mem, sparkHome.getAbsolutePath, substituteVariables) - launchDriver(builder, driverDir, driverDesc.supervise) -} -catch { - case e: Exception => finalException = Some(e) -} + // prepare driver jars and run driver + val exitCode = prepareAndRunDriver() -val state = - if (killed) { -
spark git commit: [SPARK-17018][SQL] literals.sql for testing literal parsing
Repository: spark Updated Branches: refs/heads/branch-2.0 6bf20cd94 -> bc683f037 [SPARK-17018][SQL] literals.sql for testing literal parsing ## What changes were proposed in this pull request? This patch adds literals.sql for testing literal parsing end-to-end in SQL. ## How was this patch tested? The patch itself is only about adding test cases. Author: petermaxlee Closes #14598 from petermaxlee/SPARK-17018-2. (cherry picked from commit cf9367826c38e5f34ae69b409f5d09c55ed1d319) Signed-off-by: Reynold Xin Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/bc683f03 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/bc683f03 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/bc683f03 Branch: refs/heads/branch-2.0 Commit: bc683f037d4e84f2a42eb7b1aaa9e0e4fd5f833a Parents: 6bf20cd Author: petermaxlee Authored: Thu Aug 11 13:55:10 2016 -0700 Committer: Reynold Xin Committed: Thu Aug 11 13:55:17 2016 -0700 -- .../resources/sql-tests/inputs/literals.sql | 92 + .../sql-tests/inputs/number-format.sql | 16 - .../sql-tests/results/literals.sql.out | 374 +++ .../sql-tests/results/number-format.sql.out | 42 --- .../apache/spark/sql/SQLQueryTestSuite.scala| 14 +- 5 files changed, 476 insertions(+), 62 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/bc683f03/sql/core/src/test/resources/sql-tests/inputs/literals.sql -- diff --git a/sql/core/src/test/resources/sql-tests/inputs/literals.sql b/sql/core/src/test/resources/sql-tests/inputs/literals.sql new file mode 100644 index 000..62f0d3d --- /dev/null +++ b/sql/core/src/test/resources/sql-tests/inputs/literals.sql @@ -0,0 +1,92 @@ +-- Literal parsing + +-- null +select null, Null, nUll; + +-- boolean +select true, tRue, false, fALse; + +-- byte (tinyint) +select 1Y; +select 127Y, -128Y; + +-- out of range byte +select 128Y; + +-- short (smallint) +select 1S; +select 32767S, -32768S; + +-- out of range short +select 32768S; + +-- long (bigint) +select 1L, 2147483648L; +select 9223372036854775807L, -9223372036854775808L; + +-- out of range long +select 9223372036854775808L; + +-- integral parsing + +-- parse int +select 1, -1; + +-- parse int max and min value as int +select 2147483647, -2147483648; + +-- parse long max and min value as long +select 9223372036854775807, -9223372036854775808; + +-- parse as decimals (Long.MaxValue + 1, and Long.MinValue - 1) +select 9223372036854775808, -9223372036854775809; + +-- out of range decimal numbers +select 1234567890123456789012345678901234567890; +select 1234567890123456789012345678901234567890.0; + +-- double +select 1D, 1.2D, 1e10, 1.5e5, .10D, 0.10D, .1e5, .9e+2, 0.9e+2, 900e-1, 9.e+1; +select -1D, -1.2D, -1e10, -1.5e5, -.10D, -0.10D, -.1e5; +-- negative double +select .e3; +-- inf and -inf +select 1E309, -1E309; + +-- decimal parsing +select 0.3, -0.8, .5, -.18, 0., .; + +-- super large scientific notation numbers should still be valid doubles +select 123456789012345678901234567890123456789e10, 123456789012345678901234567890123456789.1e10; + +-- string +select "Hello Peter!", 'hello lee!'; +-- multi string +select 'hello' 'world', 'hello' " " 'lee'; +-- single quote within double quotes +select "hello 'peter'"; +select 'pattern%', 'no-pattern\%', 'pattern\\%', 'pattern\\\%'; +select '\'', '"', '\n', '\r', '\t', 'Z'; +-- "Hello!" in octals +select '\110\145\154\154\157\041'; +-- "World :)" in unicode +select '\u0057\u006F\u0072\u006C\u0064\u0020\u003A\u0029'; + +-- date +select dAte '2016-03-12'; +-- invalid date +select date 'mar 11 2016'; + +-- timestamp +select tImEstAmp '2016-03-11 20:54:00.000'; +-- invalid timestamp +select timestamp '2016-33-11 20:54:00.000'; + +-- interval +select interval 13.123456789 seconds, interval -13.123456789 second; +select interval 1 year 2 month 3 week 4 day 5 hour 6 minute 7 seconds 8 millisecond, 9 microsecond; +-- ns is not supported +select interval 10 nanoseconds; + +-- unsupported data type +select GEO '(10,-6)'; http://git-wip-us.apache.org/repos/asf/spark/blob/bc683f03/sql/core/src/test/resources/sql-tests/inputs/number-format.sql -- diff --git a/sql/core/src/test/resources/sql-tests/inputs/number-format.sql b/sql/core/src/test/resources/sql-tests/inputs/number-format.sql deleted file mode 100644 index a32d068..000 --- a/sql/core/src/test/resources/sql-tests/inputs/number-format.sql +++ /dev/null @@ -1,16 +0,0 @@ --- Verifies how we parse numbers - --- parse as ints -select 1, -1; - --- parse as longs (Int.MaxValue + 1, and Int.MinValue - 1) -select 2147483648, -2147483649; - --- parse long min and max value -select 9223372036
spark git commit: [SPARK-17018][SQL] literals.sql for testing literal parsing
Repository: spark Updated Branches: refs/heads/master acaf2a81a -> cf9367826 [SPARK-17018][SQL] literals.sql for testing literal parsing ## What changes were proposed in this pull request? This patch adds literals.sql for testing literal parsing end-to-end in SQL. ## How was this patch tested? The patch itself is only about adding test cases. Author: petermaxlee Closes #14598 from petermaxlee/SPARK-17018-2. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/cf936782 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/cf936782 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/cf936782 Branch: refs/heads/master Commit: cf9367826c38e5f34ae69b409f5d09c55ed1d319 Parents: acaf2a8 Author: petermaxlee Authored: Thu Aug 11 13:55:10 2016 -0700 Committer: Reynold Xin Committed: Thu Aug 11 13:55:10 2016 -0700 -- .../resources/sql-tests/inputs/literals.sql | 92 + .../sql-tests/inputs/number-format.sql | 16 - .../sql-tests/results/literals.sql.out | 374 +++ .../sql-tests/results/number-format.sql.out | 42 --- .../apache/spark/sql/SQLQueryTestSuite.scala| 14 +- 5 files changed, 476 insertions(+), 62 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/cf936782/sql/core/src/test/resources/sql-tests/inputs/literals.sql -- diff --git a/sql/core/src/test/resources/sql-tests/inputs/literals.sql b/sql/core/src/test/resources/sql-tests/inputs/literals.sql new file mode 100644 index 000..62f0d3d --- /dev/null +++ b/sql/core/src/test/resources/sql-tests/inputs/literals.sql @@ -0,0 +1,92 @@ +-- Literal parsing + +-- null +select null, Null, nUll; + +-- boolean +select true, tRue, false, fALse; + +-- byte (tinyint) +select 1Y; +select 127Y, -128Y; + +-- out of range byte +select 128Y; + +-- short (smallint) +select 1S; +select 32767S, -32768S; + +-- out of range short +select 32768S; + +-- long (bigint) +select 1L, 2147483648L; +select 9223372036854775807L, -9223372036854775808L; + +-- out of range long +select 9223372036854775808L; + +-- integral parsing + +-- parse int +select 1, -1; + +-- parse int max and min value as int +select 2147483647, -2147483648; + +-- parse long max and min value as long +select 9223372036854775807, -9223372036854775808; + +-- parse as decimals (Long.MaxValue + 1, and Long.MinValue - 1) +select 9223372036854775808, -9223372036854775809; + +-- out of range decimal numbers +select 1234567890123456789012345678901234567890; +select 1234567890123456789012345678901234567890.0; + +-- double +select 1D, 1.2D, 1e10, 1.5e5, .10D, 0.10D, .1e5, .9e+2, 0.9e+2, 900e-1, 9.e+1; +select -1D, -1.2D, -1e10, -1.5e5, -.10D, -0.10D, -.1e5; +-- negative double +select .e3; +-- inf and -inf +select 1E309, -1E309; + +-- decimal parsing +select 0.3, -0.8, .5, -.18, 0., .; + +-- super large scientific notation numbers should still be valid doubles +select 123456789012345678901234567890123456789e10, 123456789012345678901234567890123456789.1e10; + +-- string +select "Hello Peter!", 'hello lee!'; +-- multi string +select 'hello' 'world', 'hello' " " 'lee'; +-- single quote within double quotes +select "hello 'peter'"; +select 'pattern%', 'no-pattern\%', 'pattern\\%', 'pattern\\\%'; +select '\'', '"', '\n', '\r', '\t', 'Z'; +-- "Hello!" in octals +select '\110\145\154\154\157\041'; +-- "World :)" in unicode +select '\u0057\u006F\u0072\u006C\u0064\u0020\u003A\u0029'; + +-- date +select dAte '2016-03-12'; +-- invalid date +select date 'mar 11 2016'; + +-- timestamp +select tImEstAmp '2016-03-11 20:54:00.000'; +-- invalid timestamp +select timestamp '2016-33-11 20:54:00.000'; + +-- interval +select interval 13.123456789 seconds, interval -13.123456789 second; +select interval 1 year 2 month 3 week 4 day 5 hour 6 minute 7 seconds 8 millisecond, 9 microsecond; +-- ns is not supported +select interval 10 nanoseconds; + +-- unsupported data type +select GEO '(10,-6)'; http://git-wip-us.apache.org/repos/asf/spark/blob/cf936782/sql/core/src/test/resources/sql-tests/inputs/number-format.sql -- diff --git a/sql/core/src/test/resources/sql-tests/inputs/number-format.sql b/sql/core/src/test/resources/sql-tests/inputs/number-format.sql deleted file mode 100644 index a32d068..000 --- a/sql/core/src/test/resources/sql-tests/inputs/number-format.sql +++ /dev/null @@ -1,16 +0,0 @@ --- Verifies how we parse numbers - --- parse as ints -select 1, -1; - --- parse as longs (Int.MaxValue + 1, and Int.MinValue - 1) -select 2147483648, -2147483649; - --- parse long min and max value -select 9223372036854775807, -9223372036854775808; - --- parse as decimals (Long.MaxValue + 1, and Long.MinValue - 1) -selec
spark git commit: [SPARK-17021][SQL] simplify the constructor parameters of QuantileSummaries
Repository: spark Updated Branches: refs/heads/master 0f72e4f04 -> acaf2a81a [SPARK-17021][SQL] simplify the constructor parameters of QuantileSummaries ## What changes were proposed in this pull request? 1. `sampled` doesn't need to be `ArrayBuffer`, we never update it, but assign new value 2. `count` doesn't need to be `var`, we never mutate it. 3. `headSampled` doesn't need to be in constructor, we never pass a non-empty `headSampled` to constructor ## How was this patch tested? existing tests. Author: Wenchen Fan Closes #14603 from cloud-fan/simply. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/acaf2a81 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/acaf2a81 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/acaf2a81 Branch: refs/heads/master Commit: acaf2a81ad5238fd1bc81e7be2c328f40c07e755 Parents: 0f72e4f Author: Wenchen Fan Authored: Thu Aug 11 11:02:11 2016 -0700 Committer: Yin Huai Committed: Thu Aug 11 11:02:11 2016 -0700 -- .../sql/execution/stat/StatFunctions.scala | 21 ++-- 1 file changed, 11 insertions(+), 10 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/acaf2a81/sql/core/src/main/scala/org/apache/spark/sql/execution/stat/StatFunctions.scala -- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/stat/StatFunctions.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/stat/StatFunctions.scala index 50eecb4..7c58c48 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/stat/StatFunctions.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/stat/StatFunctions.scala @@ -114,14 +114,15 @@ object StatFunctions extends Logging { * See the G-K article for more details. * @param count the count of all the elements *inserted in the sampled buffer* * (excluding the head buffer) - * @param headSampled a buffer of latest samples seen so far */ class QuantileSummaries( val compressThreshold: Int, val relativeError: Double, - val sampled: ArrayBuffer[Stats] = ArrayBuffer.empty, - private[stat] var count: Long = 0L, - val headSampled: ArrayBuffer[Double] = ArrayBuffer.empty) extends Serializable { + val sampled: Array[Stats] = Array.empty, + val count: Long = 0L) extends Serializable { + +// a buffer of latest samples seen so far +private val headSampled: ArrayBuffer[Double] = ArrayBuffer.empty import QuantileSummaries._ @@ -186,7 +187,7 @@ object StatFunctions extends Logging { newSamples.append(sampled(sampleIdx)) sampleIdx += 1 } - new QuantileSummaries(compressThreshold, relativeError, newSamples, currentCount) + new QuantileSummaries(compressThreshold, relativeError, newSamples.toArray, currentCount) } /** @@ -207,7 +208,7 @@ object StatFunctions extends Logging { } private def shallowCopy: QuantileSummaries = { - new QuantileSummaries(compressThreshold, relativeError, sampled, count, headSampled) + new QuantileSummaries(compressThreshold, relativeError, sampled, count) } /** @@ -305,11 +306,11 @@ object StatFunctions extends Logging { private def compressImmut( currentSamples: IndexedSeq[Stats], -mergeThreshold: Double): ArrayBuffer[Stats] = { - val res: ArrayBuffer[Stats] = ArrayBuffer.empty +mergeThreshold: Double): Array[Stats] = { if (currentSamples.isEmpty) { -return res +return Array.empty[Stats] } + val res: ArrayBuffer[Stats] = ArrayBuffer.empty // Start for the last element, which is always part of the set. // The head contains the current new head, that may be merged with the current element. var head = currentSamples.last @@ -332,7 +333,7 @@ object StatFunctions extends Logging { res.prepend(head) // If necessary, add the minimum element: res.prepend(currentSamples.head) - res + res.toArray } } - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-17015][SQL] group-by/order-by ordinal and arithmetic tests
Repository: spark Updated Branches: refs/heads/branch-2.0 33a213f33 -> 6bf20cd94 [SPARK-17015][SQL] group-by/order-by ordinal and arithmetic tests This patch adds three test files: 1. arithmetic.sql.out 2. order-by-ordinal.sql 3. group-by-ordinal.sql This includes https://github.com/apache/spark/pull/14594. This is a test case change. Author: petermaxlee Closes #14595 from petermaxlee/SPARK-17015. (cherry picked from commit a7b02db457d5fc663ce6a1ef01bf04689870e6b4) Signed-off-by: Reynold Xin Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/6bf20cd9 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/6bf20cd9 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/6bf20cd9 Branch: refs/heads/branch-2.0 Commit: 6bf20cd9460fd27c3e1e434b1cf31a3778ec3443 Parents: 33a213f Author: petermaxlee Authored: Thu Aug 11 01:43:08 2016 -0700 Committer: Reynold Xin Committed: Thu Aug 11 10:50:52 2016 -0700 -- .../spark/sql/catalyst/analysis/Analyzer.scala | 24 +- .../resources/sql-tests/inputs/arithmetic.sql | 26 +++ .../sql-tests/inputs/group-by-ordinal.sql | 50 + .../sql-tests/inputs/order-by-ordinal.sql | 36 +++ .../sql-tests/results/arithmetic.sql.out| 178 +++ .../sql-tests/results/group-by-ordinal.sql.out | 168 ++ .../sql-tests/results/order-by-ordinal.sql.out | 143 .../org/apache/spark/sql/SQLQuerySuite.scala| 220 --- 8 files changed, 613 insertions(+), 232 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/6bf20cd9/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala -- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala index 660f523..57c3d9a 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala @@ -547,8 +547,7 @@ class Analyzer( case a: Aggregate if containsStar(a.aggregateExpressions) => if (conf.groupByOrdinal && a.groupingExpressions.exists(IntegerIndex.unapply(_).nonEmpty)) { failAnalysis( -"Group by position: star is not allowed to use in the select list " + - "when using ordinals in group by") +"Star (*) is not allowed in select list when GROUP BY ordinal position is used") } else { a.copy(aggregateExpressions = buildExpandedProjectList(a.aggregateExpressions, a.child)) } @@ -723,9 +722,9 @@ class Analyzer( if (index > 0 && index <= child.output.size) { SortOrder(child.output(index - 1), direction) } else { - throw new UnresolvedException(s, -s"Order/sort By position: $index does not exist " + -s"The Select List is indexed from 1 to ${child.output.size}") + s.failAnalysis( +s"ORDER BY position $index is not in select list " + + s"(valid range is [1, ${child.output.size}])") } case o => o } @@ -737,17 +736,18 @@ class Analyzer( if conf.groupByOrdinal && aggs.forall(_.resolved) && groups.exists(IntegerIndex.unapply(_).nonEmpty) => val newGroups = groups.map { - case IntegerIndex(index) if index > 0 && index <= aggs.size => + case ordinal @ IntegerIndex(index) if index > 0 && index <= aggs.size => aggs(index - 1) match { case e if ResolveAggregateFunctions.containsAggregate(e) => -throw new UnresolvedException(a, - s"Group by position: the '$index'th column in the select contains an " + - s"aggregate function: ${e.sql}. Aggregate functions are not allowed in GROUP BY") +ordinal.failAnalysis( + s"GROUP BY position $index is an aggregate function, and " + +"aggregate functions are not allowed in GROUP BY") case o => o } - case IntegerIndex(index) => -throw new UnresolvedException(a, - s"Group by position: '$index' exceeds the size of the select list '${aggs.size}'.") + case ordinal @ IntegerIndex(index) => +ordinal.failAnalysis( + s"GROUP BY position $index is not in select list " + +s"(valid range is [1, ${aggs.size}])") case o => o } Aggregate(newGroups, aggs, child) http://git-wip-us.apache.org/repos/asf/spark/blob/6bf20cd9/sql/core/src/test/
spark git commit: [SPARK-16958] [SQL] Reuse subqueries within the same query
Repository: spark Updated Branches: refs/heads/master 4d496802f -> 0f72e4f04 [SPARK-16958] [SQL] Reuse subqueries within the same query ## What changes were proposed in this pull request? There could be multiple subqueries that generate same results, we could re-use the result instead of running it multiple times. This PR also cleanup up how we run subqueries. For SQL query ```sql select id,(select avg(id) from t) from t where id > (select avg(id) from t) ``` The explain is ``` == Physical Plan == *Project [id#15L, Subquery subquery29 AS scalarsubquery()#35] : +- Subquery subquery29 : +- *HashAggregate(keys=[], functions=[avg(id#15L)]) :+- Exchange SinglePartition : +- *HashAggregate(keys=[], functions=[partial_avg(id#15L)]) : +- *Range (0, 1000, splits=4) +- *Filter (cast(id#15L as double) > Subquery subquery29) : +- Subquery subquery29 : +- *HashAggregate(keys=[], functions=[avg(id#15L)]) :+- Exchange SinglePartition : +- *HashAggregate(keys=[], functions=[partial_avg(id#15L)]) : +- *Range (0, 1000, splits=4) +- *Range (0, 1000, splits=4) ``` The visualized plan: ![reuse-subquery](https://cloud.githubusercontent.com/assets/40902/17573229/e578d93c-5f0d-11e6-8a3c-0150d81d3aed.png) ## How was this patch tested? Existing tests. Author: Davies Liu Closes #14548 from davies/subq. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/0f72e4f0 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/0f72e4f0 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/0f72e4f0 Branch: refs/heads/master Commit: 0f72e4f04b227b9cd5d7ae5958e09b1def49420a Parents: 4d49680 Author: Davies Liu Authored: Thu Aug 11 09:47:19 2016 -0700 Committer: Davies Liu Committed: Thu Aug 11 09:47:19 2016 -0700 -- .../sql/catalyst/expressions/subquery.scala | 7 + .../spark/sql/catalyst/trees/TreeNode.scala | 4 +- .../spark/sql/execution/QueryExecution.scala| 3 +- .../apache/spark/sql/execution/SparkPlan.scala | 34 ++--- .../sql/execution/basicPhysicalOperators.scala | 63 +++- .../apache/spark/sql/execution/subquery.scala | 145 +-- .../spark/sql/execution/ui/SparkPlanGraph.scala | 8 +- 7 files changed, 215 insertions(+), 49 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/0f72e4f0/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/subquery.scala -- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/subquery.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/subquery.scala index 08cb6c0..ac44f08 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/subquery.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/subquery.scala @@ -102,6 +102,13 @@ case class PredicateSubquery( override def nullable: Boolean = nullAware override def plan: LogicalPlan = SubqueryAlias(toString, query) override def withNewPlan(plan: LogicalPlan): PredicateSubquery = copy(query = plan) + override def semanticEquals(o: Expression): Boolean = o match { +case p: PredicateSubquery => + query.sameResult(p.query) && nullAware == p.nullAware && +children.length == p.children.length && +children.zip(p.children).forall(p => p._1.semanticEquals(p._2)) +case _ => false + } override def toString: String = s"predicate-subquery#${exprId.id} $conditionString" } http://git-wip-us.apache.org/repos/asf/spark/blob/0f72e4f0/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala -- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala index 8bce404..24a2dc9 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala @@ -538,9 +538,9 @@ abstract class TreeNode[BaseType <: TreeNode[BaseType]] extends Product { if (innerChildren.nonEmpty) { innerChildren.init.foreach(_.generateTreeString( -depth + 2, lastChildren :+ false :+ false, builder, verbose)) +depth + 2, lastChildren :+ children.isEmpty :+ false, builder, verbose)) innerChildren.last.generateTreeString( -depth + 2, lastChildren :+ false :+ true, builder, verbose) +depth + 2, lastChildren :+ children.isEmpty :+ true, builder, verbose) } if (children.nonEmpty) { http://git-wip-us.apache.org/repos/asf/spark/blob
spark git commit: Revert "[SPARK-16831][PYTHON] Fixed bug in CrossValidator.avgMetrics"
Repository: spark Updated Branches: refs/heads/branch-1.6 ace458f03 -> b3ecff640 Revert "[SPARK-16831][PYTHON] Fixed bug in CrossValidator.avgMetrics" This reverts commit 92ee6fbf5d5096245d9f1a84cd3a8e66062dd945. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/b3ecff64 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/b3ecff64 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/b3ecff64 Branch: refs/heads/branch-1.6 Commit: b3ecff640f5ecaf07edcdc6f28460f788201 Parents: ace458f Author: Sean Owen Authored: Thu Aug 11 16:59:54 2016 +0100 Committer: Sean Owen Committed: Thu Aug 11 16:59:54 2016 +0100 -- python/pyspark/ml/tuning.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/b3ecff64/python/pyspark/ml/tuning.py -- diff --git a/python/pyspark/ml/tuning.py b/python/pyspark/ml/tuning.py index 8f2cd62..705ee53 100644 --- a/python/pyspark/ml/tuning.py +++ b/python/pyspark/ml/tuning.py @@ -108,8 +108,6 @@ class CrossValidator(Estimator): >>> evaluator = BinaryClassificationEvaluator() >>> cv = CrossValidator(estimator=lr, estimatorParamMaps=grid, evaluator=evaluator) >>> cvModel = cv.fit(dataset) ->>> cvModel.avgMetrics[0] -0.5 >>> evaluator.evaluate(cvModel.transform(dataset)) 0.8333... @@ -241,7 +239,7 @@ class CrossValidator(Estimator): model = est.fit(train, epm[j]) # TODO: duplicate evaluator to take extra params from input metric = eva.evaluate(model.transform(validation, epm[j])) -metrics[j] += metric/nFolds +metrics[j] += metric if eva.isLargerBetter(): bestIndex = np.argmax(metrics) - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-15899][SQL] Fix the construction of the file path with hadoop Path for Spark 2.0
Repository: spark Updated Branches: refs/heads/branch-2.0 0ed6236e9 -> 33a213f33 [SPARK-15899][SQL] Fix the construction of the file path with hadoop Path for Spark 2.0 This PR contains the adaptation of https://github.com/apache/spark/pull/13868 for Spark 2.0 ## What changes were proposed in this pull request? Fix the construction of the file path in `SQLConf.scala` and unit tests that rely on this: `SQLConfSuite` and `DDLSuite`. Previous way of construction caused the creation of incorrect path on Windows. ## How was this patch tested? Run unit tests on Windows Author: avulanov Closes #14600 from avulanov/SPARK-15899-file-2.0. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/33a213f3 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/33a213f3 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/33a213f3 Branch: refs/heads/branch-2.0 Commit: 33a213f330bd746fb54783b16ea90c91b23a02a6 Parents: 0ed6236 Author: avulanov Authored: Thu Aug 11 13:07:14 2016 +0100 Committer: Sean Owen Committed: Thu Aug 11 13:07:14 2016 +0100 -- .../org/apache/spark/sql/internal/SQLConf.scala | 7 ++-- .../spark/sql/execution/command/DDLSuite.scala | 44 ++-- .../spark/sql/internal/SQLConfSuite.scala | 4 +- 3 files changed, 30 insertions(+), 25 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/33a213f3/sql/core/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala -- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala index 1a9bb6a..0666a99 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala @@ -23,6 +23,7 @@ import java.util.concurrent.TimeUnit import scala.collection.JavaConverters._ import scala.collection.immutable +import org.apache.hadoop.fs.Path import org.apache.parquet.hadoop.ParquetOutputCommitter import org.apache.spark.internal.Logging @@ -55,7 +56,7 @@ object SQLConf { val WAREHOUSE_PATH = SQLConfigBuilder("spark.sql.warehouse.dir") .doc("The default location for managed databases and tables.") .stringConf -.createWithDefault("file:${system:user.dir}/spark-warehouse") +.createWithDefault("${system:user.dir}/spark-warehouse") val OPTIMIZER_MAX_ITERATIONS = SQLConfigBuilder("spark.sql.optimizer.maxIterations") .internal() @@ -691,9 +692,9 @@ private[sql] class SQLConf extends Serializable with CatalystConf with Logging { def variableSubstituteDepth: Int = getConf(VARIABLE_SUBSTITUTE_DEPTH) def warehousePath: String = { -getConf(WAREHOUSE_PATH).replace("${system:user.dir}", System.getProperty("user.dir")) +new Path(getConf(WAREHOUSE_PATH).replace("${system:user.dir}", + System.getProperty("user.dir"))).toString } - override def orderByOrdinal: Boolean = getConf(ORDER_BY_ORDINAL) override def groupByOrdinal: Boolean = getConf(GROUP_BY_ORDINAL) http://git-wip-us.apache.org/repos/asf/spark/blob/33a213f3/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala -- diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala index 8ca4722..d70cae7 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala @@ -111,10 +111,6 @@ class DDLSuite extends QueryTest with SharedSQLContext with BeforeAndAfterEach { catalog.createPartitions(tableName, Seq(part), ignoreIfExists = false) } - private def appendTrailingSlash(path: String): String = { -if (!path.endsWith(File.separator)) path + File.separator else path - } - test("the qualified path of a database is stored in the catalog") { val catalog = spark.sessionState.catalog @@ -122,18 +118,19 @@ class DDLSuite extends QueryTest with SharedSQLContext with BeforeAndAfterEach { val path = tmpDir.toString // The generated temp path is not qualified. assert(!path.startsWith("file:/")) - sql(s"CREATE DATABASE db1 LOCATION '$path'") + val uri = tmpDir.toURI + sql(s"CREATE DATABASE db1 LOCATION '$uri'") val pathInCatalog = new Path(catalog.getDatabaseMetadata("db1").locationUri).toUri assert("file" === pathInCatalog.getScheme) - val expectedPath = if (path.endsWith(File.separator)) path.dropRight(1) else path - assert(expectedPath === pathInCatalog.getPath) + val ex
spark git commit: [SPARK-16952] don't lookup spark home directory when executor uri is set
Repository: spark Updated Branches: refs/heads/master 7186e8c31 -> 4d496802f [SPARK-16952] don't lookup spark home directory when executor uri is set ## What changes were proposed in this pull request? remove requirement to set spark.mesos.executor.home when spark.executor.uri is used ## How was this patch tested? unit tests Author: Michael Gummelt Closes #14552 from mgummelt/fix-spark-home. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/4d496802 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/4d496802 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/4d496802 Branch: refs/heads/master Commit: 4d496802f592dca96dada73b24afc93c668a7f26 Parents: 7186e8c Author: Michael Gummelt Authored: Thu Aug 11 11:36:20 2016 +0100 Committer: Sean Owen Committed: Thu Aug 11 11:36:20 2016 +0100 -- .../MesosCoarseGrainedSchedulerBackend.scala| 10 - ...esosCoarseGrainedSchedulerBackendSuite.scala | 23 ++-- 2 files changed, 26 insertions(+), 7 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/4d496802/core/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosCoarseGrainedSchedulerBackend.scala -- diff --git a/core/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosCoarseGrainedSchedulerBackend.scala b/core/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosCoarseGrainedSchedulerBackend.scala index 0933a03..4a88824 100644 --- a/core/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosCoarseGrainedSchedulerBackend.scala +++ b/core/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosCoarseGrainedSchedulerBackend.scala @@ -163,11 +163,6 @@ private[spark] class MesosCoarseGrainedSchedulerBackend( } def createCommand(offer: Offer, numCores: Int, taskId: String): CommandInfo = { -val executorSparkHome = conf.getOption("spark.mesos.executor.home") - .orElse(sc.getSparkHome()) - .getOrElse { -throw new SparkException("Executor Spark home `spark.mesos.executor.home` is not set!") - } val environment = Environment.newBuilder() val extraClassPath = conf.getOption("spark.executor.extraClassPath") extraClassPath.foreach { cp => @@ -201,6 +196,11 @@ private[spark] class MesosCoarseGrainedSchedulerBackend( .orElse(Option(System.getenv("SPARK_EXECUTOR_URI"))) if (uri.isEmpty) { + val executorSparkHome = conf.getOption("spark.mesos.executor.home") +.orElse(sc.getSparkHome()) +.getOrElse { + throw new SparkException("Executor Spark home `spark.mesos.executor.home` is not set!") +} val runScript = new File(executorSparkHome, "./bin/spark-class").getPath command.setValue( "%s \"%s\" org.apache.spark.executor.CoarseGrainedExecutorBackend" http://git-wip-us.apache.org/repos/asf/spark/blob/4d496802/core/src/test/scala/org/apache/spark/scheduler/cluster/mesos/MesosCoarseGrainedSchedulerBackendSuite.scala -- diff --git a/core/src/test/scala/org/apache/spark/scheduler/cluster/mesos/MesosCoarseGrainedSchedulerBackendSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/cluster/mesos/MesosCoarseGrainedSchedulerBackendSuite.scala index 0e66979..26a3ad4 100644 --- a/core/src/test/scala/org/apache/spark/scheduler/cluster/mesos/MesosCoarseGrainedSchedulerBackendSuite.scala +++ b/core/src/test/scala/org/apache/spark/scheduler/cluster/mesos/MesosCoarseGrainedSchedulerBackendSuite.scala @@ -370,6 +370,21 @@ class MesosCoarseGrainedSchedulerBackendSuite extends SparkFunSuite verify(driverEndpoint, never()).askWithRetry(isA(classOf[RemoveExecutor]))(any[ClassTag[_]]) } + test("mesos supports spark.executor.uri") { +val url = "spark.spark.spark.com" +setBackend(Map( + "spark.executor.uri" -> url +), false) + +val (mem, cpu) = (backend.executorMemory(sc), 4) + +val offer1 = createOffer("o1", "s1", mem, cpu) +backend.resourceOffers(driver, List(offer1).asJava) + +val launchedTasks = verifyTaskLaunched(driver, "o1") +assert(launchedTasks.head.getCommand.getUrisList.asScala(0).getValue == url) + } + private def verifyDeclinedOffer(driver: SchedulerDriver, offerId: OfferID, filter: Boolean = false): Unit = { @@ -435,13 +450,17 @@ class MesosCoarseGrainedSchedulerBackendSuite extends SparkFunSuite backend } - private def setBackend(sparkConfVars: Map[String, String] = null) { + private def setBackend(sparkConfVars: Map[String, String] = null, + setHome: Boolean = true) { sparkConf = (new SparkConf) .setMaster("local[*]") .setAppName("test-mesos-dynamic-alloc") -
spark git commit: [SPARK-16886][EXAMPLES][DOC] Fix some examples to be consistent and indentation in documentation
Repository: spark Updated Branches: refs/heads/master a45fefd17 -> 7186e8c31 [SPARK-16886][EXAMPLES][DOC] Fix some examples to be consistent and indentation in documentation ## What changes were proposed in this pull request? Originally this PR was based on #14491 but I realised that fixing examples are more sensible rather than comments. This PR fixes three things below: - Fix two wrong examples in `structured-streaming-programming-guide.md`. Loading via `read.load(..)` without `as` will be `Dataset` not `Dataset` in Java. - Fix indentation across `structured-streaming-programming-guide.md`. Python has 4 spaces and Scala and Java have double spaces. These are inconsistent across the examples. - Fix `StructuredNetworkWordCountWindowed` and `StructuredNetworkWordCount` in Java and Scala to initially load `DataFrame` and `Dataset` to be consistent with the comments and some examples in `structured-streaming-programming-guide.md` and to match Scala and Java to Python one (Python one loads it as `DataFrame` initially). ## How was this patch tested? N/A Closes https://github.com/apache/spark/pull/14491 Author: hyukjinkwon Author: Ganesh Chand Closes #14564 from HyukjinKwon/SPARK-16886. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/7186e8c3 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/7186e8c3 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/7186e8c3 Branch: refs/heads/master Commit: 7186e8c3180b7f38250cf2f2de791472bf5325a5 Parents: a45fefd Author: hyukjinkwon Authored: Thu Aug 11 11:31:52 2016 +0100 Committer: Sean Owen Committed: Thu Aug 11 11:31:52 2016 +0100 -- docs/structured-streaming-programming-guide.md | 202 +-- .../JavaStructuredNetworkWordCount.java | 6 +- .../JavaStructuredNetworkWordCountWindowed.java | 30 +-- .../streaming/StructuredNetworkWordCount.scala | 4 +- .../StructuredNetworkWordCountWindowed.scala| 4 +- 5 files changed, 124 insertions(+), 122 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/7186e8c3/docs/structured-streaming-programming-guide.md -- diff --git a/docs/structured-streaming-programming-guide.md b/docs/structured-streaming-programming-guide.md index 8c14c3d..99d50e5 100644 --- a/docs/structured-streaming-programming-guide.md +++ b/docs/structured-streaming-programming-guide.md @@ -46,9 +46,9 @@ import java.util.Arrays; import java.util.Iterator; SparkSession spark = SparkSession -.builder() -.appName("JavaStructuredNetworkWordCount") -.getOrCreate(); + .builder() + .appName("JavaStructuredNetworkWordCount") + .getOrCreate(); {% endhighlight %} @@ -95,7 +95,7 @@ This `lines` DataFrame represents an unbounded table containing the streaming te {% highlight java %} // Create DataFrame representing the stream of input lines from connection to localhost: -Dataset lines = spark +Dataset lines = spark .readStream() .format("socket") .option("host", "localhost") @@ -104,14 +104,14 @@ Dataset lines = spark // Split the lines into words Dataset words = lines -.as(Encoders.STRING()) -.flatMap( -new FlatMapFunction() { - @Override - public Iterator call(String x) { -return Arrays.asList(x.split(" ")).iterator(); - } -}, Encoders.STRING()); + .as(Encoders.STRING()) + .flatMap( +new FlatMapFunction() { + @Override + public Iterator call(String x) { +return Arrays.asList(x.split(" ")).iterator(); + } +}, Encoders.STRING()); // Generate running word count Dataset wordCounts = words.groupBy("value").count(); @@ -125,11 +125,11 @@ This `lines` DataFrame represents an unbounded table containing the streaming te {% highlight python %} # Create DataFrame representing the stream of input lines from connection to localhost: lines = spark\ - .readStream\ - .format('socket')\ - .option('host', 'localhost')\ - .option('port', )\ - .load() +.readStream\ +.format('socket')\ +.option('host', 'localhost')\ +.option('port', )\ +.load() # Split the lines into words words = lines.select( @@ -434,11 +434,11 @@ val spark: SparkSession = ... // Read text from socket val socketDF = spark -.readStream -.format("socket") -.option("host", "localhost") -.option("port", ) -.load() + .readStream + .format("socket") + .option("host", "localhost") + .option("port", ) + .load() socketDF.isStreaming// Returns True for DataFrames that have streaming sources @@ -447,10 +447,10 @@ socketDF.printSchema // Read all the csv files written atomically in a directory val userSchema = new StructType().add("
spark git commit: [SPARK-16941] Use concurrentHashMap instead of scala Map in SparkSQLOperationManager.
Repository: spark Updated Branches: refs/heads/master 8a6b7037b -> a45fefd17 [SPARK-16941] Use concurrentHashMap instead of scala Map in SparkSQLOperationManager. ## What changes were proposed in this pull request? ThriftServer will have some thread-safe problem in **SparkSQLOperationManager**. Add a SynchronizedMap trait for the maps in it to avoid this problem. Details in [SPARK-16941](https://issues.apache.org/jira/browse/SPARK-16941) ## How was this patch tested? NA Author: huangzhaowei Closes #14534 from SaintBacchus/SPARK-16941. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/a45fefd1 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/a45fefd1 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/a45fefd1 Branch: refs/heads/master Commit: a45fefd17ec4a499b988a2f9931ce397918d3bef Parents: 8a6b703 Author: huangzhaowei Authored: Thu Aug 11 11:28:28 2016 +0100 Committer: Sean Owen Committed: Thu Aug 11 11:28:28 2016 +0100 -- .../thriftserver/SparkExecuteStatementOperation.scala| 9 + .../sql/hive/thriftserver/SparkSQLSessionManager.scala | 4 ++-- .../thriftserver/server/SparkSQLOperationManager.scala | 11 ++- 3 files changed, 13 insertions(+), 11 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/a45fefd1/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkExecuteStatementOperation.scala -- diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkExecuteStatementOperation.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkExecuteStatementOperation.scala index b2717ec..e555ebd 100644 --- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkExecuteStatementOperation.scala +++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkExecuteStatementOperation.scala @@ -23,7 +23,7 @@ import java.util.{Arrays, Map => JMap, UUID} import java.util.concurrent.RejectedExecutionException import scala.collection.JavaConverters._ -import scala.collection.mutable.{ArrayBuffer, Map => SMap} +import scala.collection.mutable.ArrayBuffer import scala.util.control.NonFatal import org.apache.hadoop.hive.metastore.api.FieldSchema @@ -45,7 +45,7 @@ private[hive] class SparkExecuteStatementOperation( statement: String, confOverlay: JMap[String, String], runInBackground: Boolean = true) -(sqlContext: SQLContext, sessionToActivePool: SMap[SessionHandle, String]) +(sqlContext: SQLContext, sessionToActivePool: JMap[SessionHandle, String]) extends ExecuteStatementOperation(parentSession, statement, confOverlay, runInBackground) with Logging { @@ -215,7 +215,8 @@ private[hive] class SparkExecuteStatementOperation( statementId, parentSession.getUsername) sqlContext.sparkContext.setJobGroup(statementId, statement) -sessionToActivePool.get(parentSession.getSessionHandle).foreach { pool => +val pool = sessionToActivePool.get(parentSession.getSessionHandle) +if (pool != null) { sqlContext.sparkContext.setLocalProperty("spark.scheduler.pool", pool) } try { @@ -223,7 +224,7 @@ private[hive] class SparkExecuteStatementOperation( logDebug(result.queryExecution.toString()) result.queryExecution.logical match { case SetCommand(Some((SQLConf.THRIFTSERVER_POOL.key, Some(value => - sessionToActivePool(parentSession.getSessionHandle) = value + sessionToActivePool.put(parentSession.getSessionHandle, value) logInfo(s"Setting spark.scheduler.pool=$value for future statements in this session.") case _ => } http://git-wip-us.apache.org/repos/asf/spark/blob/a45fefd1/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLSessionManager.scala -- diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLSessionManager.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLSessionManager.scala index 1e4c479..6a5117a 100644 --- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLSessionManager.scala +++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLSessionManager.scala @@ -79,14 +79,14 @@ private[hive] class SparkSQLSessionManager(hiveServer: HiveServer2, sqlContext: sqlContext.newSession() } ctx.setConf("spark.sql.hive.version", HiveUtils.hiveExecutionVersion) -sparkSqlOperationManager.sessionToContexts += sessionHandle -> ctx +
spark git commit: Correct example value for spark.ssl.YYY.XXX settings
Repository: spark Updated Branches: refs/heads/branch-2.0 4b434e7da -> 0ed6236e9 Correct example value for spark.ssl.YYY.XXX settings Docs adjustment to: - link to other relevant section of docs - correct statement about the only value when actually other values are supported Author: Andrew Ash Closes #14581 from ash211/patch-10. (cherry picked from commit 8a6b7037bb058d00cc767895c3292509576ea2f9) Signed-off-by: Sean Owen Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/0ed6236e Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/0ed6236e Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/0ed6236e Branch: refs/heads/branch-2.0 Commit: 0ed6236e94318ae0b56363ee1aef4a5577eeebd3 Parents: 4b434e7 Author: Andrew Ash Authored: Thu Aug 11 11:26:57 2016 +0100 Committer: Sean Owen Committed: Thu Aug 11 11:27:07 2016 +0100 -- docs/configuration.md | 6 -- 1 file changed, 4 insertions(+), 2 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/0ed6236e/docs/configuration.md -- diff --git a/docs/configuration.md b/docs/configuration.md index 8facd0e..500a6da 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -1449,8 +1449,10 @@ Apart from these, the following properties are also available, and may be useful the properties must be overwritten in the protocol-specific namespace. Use spark.ssl.YYY.XXX settings to overwrite the global configuration for -particular protocol denoted by YYY. Currently YYY can be -only fs for file server. +particular protocol denoted by YYY. Example values for YYY +include fs, ui, standalone, and +historyServer. See SSL +Configuration for details on hierarchical SSL configuration for services. - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: Correct example value for spark.ssl.YYY.XXX settings
Repository: spark Updated Branches: refs/heads/master a7b02db45 -> 8a6b7037b Correct example value for spark.ssl.YYY.XXX settings Docs adjustment to: - link to other relevant section of docs - correct statement about the only value when actually other values are supported Author: Andrew Ash Closes #14581 from ash211/patch-10. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/8a6b7037 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/8a6b7037 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/8a6b7037 Branch: refs/heads/master Commit: 8a6b7037bb058d00cc767895c3292509576ea2f9 Parents: a7b02db Author: Andrew Ash Authored: Thu Aug 11 11:26:57 2016 +0100 Committer: Sean Owen Committed: Thu Aug 11 11:26:57 2016 +0100 -- docs/configuration.md | 6 -- 1 file changed, 4 insertions(+), 2 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/8a6b7037/docs/configuration.md -- diff --git a/docs/configuration.md b/docs/configuration.md index 4569bed..e33094b 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -1456,8 +1456,10 @@ Apart from these, the following properties are also available, and may be useful the properties must be overwritten in the protocol-specific namespace. Use spark.ssl.YYY.XXX settings to overwrite the global configuration for -particular protocol denoted by YYY. Currently YYY can be -only fs for file server. +particular protocol denoted by YYY. Example values for YYY +include fs, ui, standalone, and +historyServer. See SSL +Configuration for details on hierarchical SSL configuration for services. - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-17011][SQL] Support testing exceptions in SQLQueryTestSuite
Repository: spark Updated Branches: refs/heads/branch-2.0 ea8a198b9 -> 4b434e7da [SPARK-17011][SQL] Support testing exceptions in SQLQueryTestSuite ## What changes were proposed in this pull request? This patch adds exception testing to SQLQueryTestSuite. When there is an exception in query execution, the query result contains the the exception class along with the exception message. As part of this, I moved some additional test cases for limit from SQLQuerySuite over to SQLQueryTestSuite. ## How was this patch tested? This is a test harness change. Author: petermaxlee Closes #14592 from petermaxlee/SPARK-17011. (cherry picked from commit 0db373aaf87991207a7a8a09853b6fa602f0f45b) Signed-off-by: Wenchen Fan Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/4b434e7d Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/4b434e7d Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/4b434e7d Branch: refs/heads/branch-2.0 Commit: 4b434e7dadffd83fe701668a23f0ece03e3f08bb Parents: ea8a198 Author: petermaxlee Authored: Wed Aug 10 23:22:14 2016 -0700 Committer: Wenchen Fan Committed: Thu Aug 11 16:51:37 2016 +0800 -- .../test/resources/sql-tests/inputs/limit.sql | 20 + .../sql-tests/inputs/number-format.sql | 7 +- .../sql-tests/results/datetime.sql.out | 2 +- .../resources/sql-tests/results/having.sql.out | 2 +- .../resources/sql-tests/results/limit.sql.out | 83 .../sql-tests/results/natural-join.sql.out | 2 +- .../sql-tests/results/number-format.sql.out | 22 -- .../org/apache/spark/sql/SQLQuerySuite.scala| 50 .../apache/spark/sql/SQLQueryTestSuite.scala| 41 -- 9 files changed, 161 insertions(+), 68 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/4b434e7d/sql/core/src/test/resources/sql-tests/inputs/limit.sql -- diff --git a/sql/core/src/test/resources/sql-tests/inputs/limit.sql b/sql/core/src/test/resources/sql-tests/inputs/limit.sql new file mode 100644 index 000..892a1bb --- /dev/null +++ b/sql/core/src/test/resources/sql-tests/inputs/limit.sql @@ -0,0 +1,20 @@ + +-- limit on various data types +select * from testdata limit 2; +select * from arraydata limit 2; +select * from mapdata limit 2; + +-- foldable non-literal in limit +select * from testdata limit 2 + 1; + +select * from testdata limit CAST(1 AS int); + +-- limit must be non-negative +select * from testdata limit -1; + +-- limit must be foldable +select * from testdata limit key > 3; + +-- limit must be integer +select * from testdata limit true; +select * from testdata limit 'a'; http://git-wip-us.apache.org/repos/asf/spark/blob/4b434e7d/sql/core/src/test/resources/sql-tests/inputs/number-format.sql -- diff --git a/sql/core/src/test/resources/sql-tests/inputs/number-format.sql b/sql/core/src/test/resources/sql-tests/inputs/number-format.sql index 60076a8..a32d068 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/number-format.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/number-format.sql @@ -3,10 +3,13 @@ -- parse as ints select 1, -1; --- parse as longs +-- parse as longs (Int.MaxValue + 1, and Int.MinValue - 1) select 2147483648, -2147483649; --- parse as decimals +-- parse long min and max value +select 9223372036854775807, -9223372036854775808; + +-- parse as decimals (Long.MaxValue + 1, and Long.MinValue - 1) select 9223372036854775808, -9223372036854775809; -- various floating point (decimal) formats http://git-wip-us.apache.org/repos/asf/spark/blob/4b434e7d/sql/core/src/test/resources/sql-tests/results/datetime.sql.out -- diff --git a/sql/core/src/test/resources/sql-tests/results/datetime.sql.out b/sql/core/src/test/resources/sql-tests/results/datetime.sql.out index 5174657..032e425 100644 --- a/sql/core/src/test/resources/sql-tests/results/datetime.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/datetime.sql.out @@ -1,4 +1,4 @@ --- Automatically generated by org.apache.spark.sql.SQLQueryTestSuite +-- Automatically generated by SQLQueryTestSuite -- Number of queries: 1 http://git-wip-us.apache.org/repos/asf/spark/blob/4b434e7d/sql/core/src/test/resources/sql-tests/results/having.sql.out -- diff --git a/sql/core/src/test/resources/sql-tests/results/having.sql.out b/sql/core/src/test/resources/sql-tests/results/having.sql.out index 0bc8be6..e092383 100644 --- a/sql/core/src/test/resources/sql-tests/results/having.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/havin
spark git commit: [SPARK-17007][SQL] Move test data files into a test-data folder
Repository: spark Updated Branches: refs/heads/branch-2.0 51b101668 -> ea8a198b9 [SPARK-17007][SQL] Move test data files into a test-data folder ## What changes were proposed in this pull request? This patch moves all the test data files in sql/core/src/test/resources to sql/core/src/test/resources/test-data, so we don't clutter the top level sql/core/src/test/resources. Also deleted sql/core/src/test/resources/old-repeated.parquet since it is no longer used. The change will make it easier to spot sql-tests directory. ## How was this patch tested? This is a test-only change. Author: petermaxlee Closes #14589 from petermaxlee/SPARK-17007. (cherry picked from commit 665e175328130ab3eb0370cdd2a43ed5a7bed1d6) Signed-off-by: Wenchen Fan Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/ea8a198b Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/ea8a198b Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/ea8a198b Branch: refs/heads/branch-2.0 Commit: ea8a198b9838f731458456f369b700815f02198a Parents: 51b1016 Author: petermaxlee Authored: Wed Aug 10 21:26:46 2016 -0700 Committer: Wenchen Fan Committed: Thu Aug 11 16:49:43 2016 +0800 -- .../apache/spark/sql/JavaDataFrameSuite.java| 12 +++ sql/core/src/test/resources/bool.csv| 5 --- .../src/test/resources/cars-alternative.csv | 5 --- .../test/resources/cars-blank-column-name.csv | 3 -- sql/core/src/test/resources/cars-malformed.csv | 6 sql/core/src/test/resources/cars-null.csv | 6 .../test/resources/cars-unbalanced-quotes.csv | 4 --- sql/core/src/test/resources/cars.csv| 7 sql/core/src/test/resources/cars.tsv| 4 --- sql/core/src/test/resources/cars_iso-8859-1.csv | 6 sql/core/src/test/resources/comments.csv| 6 sql/core/src/test/resources/dates.csv | 4 --- .../src/test/resources/dec-in-fixed-len.parquet | Bin 460 -> 0 bytes sql/core/src/test/resources/dec-in-i32.parquet | Bin 420 -> 0 bytes sql/core/src/test/resources/dec-in-i64.parquet | Bin 437 -> 0 bytes sql/core/src/test/resources/decimal.csv | 7 .../src/test/resources/disable_comments.csv | 2 -- sql/core/src/test/resources/empty.csv | 0 .../test/resources/nested-array-struct.parquet | Bin 775 -> 0 bytes sql/core/src/test/resources/numbers.csv | 9 - .../src/test/resources/old-repeated-int.parquet | Bin 389 -> 0 bytes .../test/resources/old-repeated-message.parquet | Bin 600 -> 0 bytes .../src/test/resources/old-repeated.parquet | Bin 432 -> 0 bytes .../parquet-thrift-compat.snappy.parquet| Bin 10550 -> 0 bytes .../resources/proto-repeated-string.parquet | Bin 411 -> 0 bytes .../resources/proto-repeated-struct.parquet | Bin 608 -> 0 bytes .../proto-struct-with-array-many.parquet| Bin 802 -> 0 bytes .../resources/proto-struct-with-array.parquet | Bin 1576 -> 0 bytes sql/core/src/test/resources/simple_sparse.csv | 5 --- sql/core/src/test/resources/test-data/bool.csv | 5 +++ .../resources/test-data/cars-alternative.csv| 5 +++ .../test-data/cars-blank-column-name.csv| 3 ++ .../test/resources/test-data/cars-malformed.csv | 6 .../src/test/resources/test-data/cars-null.csv | 6 .../test-data/cars-unbalanced-quotes.csv| 4 +++ sql/core/src/test/resources/test-data/cars.csv | 7 sql/core/src/test/resources/test-data/cars.tsv | 4 +++ .../resources/test-data/cars_iso-8859-1.csv | 6 .../src/test/resources/test-data/comments.csv | 6 sql/core/src/test/resources/test-data/dates.csv | 4 +++ .../test-data/dec-in-fixed-len.parquet | Bin 0 -> 460 bytes .../test/resources/test-data/dec-in-i32.parquet | Bin 0 -> 420 bytes .../test/resources/test-data/dec-in-i64.parquet | Bin 0 -> 437 bytes .../src/test/resources/test-data/decimal.csv| 7 .../resources/test-data/disable_comments.csv| 2 ++ sql/core/src/test/resources/test-data/empty.csv | 0 .../test-data/nested-array-struct.parquet | Bin 0 -> 775 bytes .../src/test/resources/test-data/numbers.csv| 9 + .../test-data/old-repeated-int.parquet | Bin 0 -> 389 bytes .../test-data/old-repeated-message.parquet | Bin 0 -> 600 bytes .../parquet-thrift-compat.snappy.parquet| Bin 0 -> 10550 bytes .../test-data/proto-repeated-string.parquet | Bin 0 -> 411 bytes .../test-data/proto-repeated-struct.parquet | Bin 0 -> 608 bytes .../proto-struct-with-array-many.parquet| Bin 0 -> 802 bytes .../test-data/proto-struct-with-array.parquet | Bin 0 -> 1576 bytes .../test/resources/test-data/simple_sparse.csv | 5 +++ .../text-partitioned/year=2014/data.txt | 1 + .../text-partitioned/year=2015/data.txt | 1 +
spark git commit: [SPARK-17008][SPARK-17009][SQL] Normalization and isolation in SQLQueryTestSuite.
Repository: spark Updated Branches: refs/heads/branch-2.0 8611bc205 -> 51b101668 [SPARK-17008][SPARK-17009][SQL] Normalization and isolation in SQLQueryTestSuite. ## What changes were proposed in this pull request? This patch enhances SQLQueryTestSuite in two ways: 1. SPARK-17009: Use a new SparkSession for each test case to provide stronger isolation (e.g. config changes in one test case does not impact another). That said, we do not currently isolate catalog changes. 2. SPARK-17008: Normalize query output using sorting, inspired by HiveComparisonTest. I also ported a few new test cases over from SQLQuerySuite. ## How was this patch tested? This is a test harness update. Author: petermaxlee Closes #14590 from petermaxlee/SPARK-17008. (cherry picked from commit 425c7c2dbd2923094712e1215dd29272fb09cd79) Signed-off-by: Wenchen Fan Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/51b10166 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/51b10166 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/51b10166 Branch: refs/heads/branch-2.0 Commit: 51b1016682a805e06b857a6b1f160a877839dbd5 Parents: 8611bc2 Author: petermaxlee Authored: Wed Aug 10 21:05:32 2016 -0700 Committer: Wenchen Fan Committed: Thu Aug 11 16:46:03 2016 +0800 -- .../resources/sql-tests/inputs/datetime.sql | 4 ++ .../test/resources/sql-tests/inputs/having.sql | 15 + .../resources/sql-tests/inputs/natural-join.sql | 20 ++ .../sql-tests/results/datetime.sql.out | 10 +++ .../resources/sql-tests/results/having.sql.out | 40 .../sql-tests/results/natural-join.sql.out | 64 .../org/apache/spark/sql/SQLQuerySuite.scala| 62 --- .../apache/spark/sql/SQLQueryTestSuite.scala| 30 - 8 files changed, 180 insertions(+), 65 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/51b10166/sql/core/src/test/resources/sql-tests/inputs/datetime.sql -- diff --git a/sql/core/src/test/resources/sql-tests/inputs/datetime.sql b/sql/core/src/test/resources/sql-tests/inputs/datetime.sql new file mode 100644 index 000..3fd1c37 --- /dev/null +++ b/sql/core/src/test/resources/sql-tests/inputs/datetime.sql @@ -0,0 +1,4 @@ +-- date time functions + +-- [SPARK-16836] current_date and current_timestamp literals +select current_date = current_date(), current_timestamp = current_timestamp(); http://git-wip-us.apache.org/repos/asf/spark/blob/51b10166/sql/core/src/test/resources/sql-tests/inputs/having.sql -- diff --git a/sql/core/src/test/resources/sql-tests/inputs/having.sql b/sql/core/src/test/resources/sql-tests/inputs/having.sql new file mode 100644 index 000..364c022 --- /dev/null +++ b/sql/core/src/test/resources/sql-tests/inputs/having.sql @@ -0,0 +1,15 @@ +create temporary view hav as select * from values + ("one", 1), + ("two", 2), + ("three", 3), + ("one", 5) + as hav(k, v); + +-- having clause +SELECT k, sum(v) FROM hav GROUP BY k HAVING sum(v) > 2; + +-- having condition contains grouping column +SELECT count(k) FROM hav GROUP BY v + 1 HAVING v + 1 = 2; + +-- SPARK-11032: resolve having correctly +SELECT MIN(t.v) FROM (SELECT * FROM hav WHERE v > 0) t HAVING(COUNT(1) > 0); http://git-wip-us.apache.org/repos/asf/spark/blob/51b10166/sql/core/src/test/resources/sql-tests/inputs/natural-join.sql -- diff --git a/sql/core/src/test/resources/sql-tests/inputs/natural-join.sql b/sql/core/src/test/resources/sql-tests/inputs/natural-join.sql new file mode 100644 index 000..71a5015 --- /dev/null +++ b/sql/core/src/test/resources/sql-tests/inputs/natural-join.sql @@ -0,0 +1,20 @@ +create temporary view nt1 as select * from values + ("one", 1), + ("two", 2), + ("three", 3) + as nt1(k, v1); + +create temporary view nt2 as select * from values + ("one", 1), + ("two", 22), + ("one", 5) + as nt2(k, v2); + + +SELECT * FROM nt1 natural join nt2 where k = "one"; + +SELECT * FROM nt1 natural left join nt2 order by v1, v2; + +SELECT * FROM nt1 natural right join nt2 order by v1, v2; + +SELECT count(*) FROM nt1 natural full outer join nt2; http://git-wip-us.apache.org/repos/asf/spark/blob/51b10166/sql/core/src/test/resources/sql-tests/results/datetime.sql.out -- diff --git a/sql/core/src/test/resources/sql-tests/results/datetime.sql.out b/sql/core/src/test/resources/sql-tests/results/datetime.sql.out new file mode 100644 index 000..5174657 --- /dev/null +++ b/sql/core/src/test/resources/sql-tests/results/datetime.sql.out @@ -0,0 +1,10 @@ +-- Aut
spark git commit: [SPARK-16866][SQL] Infrastructure for file-based SQL end-to-end tests
Repository: spark Updated Branches: refs/heads/branch-2.0 1e4013571 -> 8611bc205 [SPARK-16866][SQL] Infrastructure for file-based SQL end-to-end tests ## What changes were proposed in this pull request? This patch introduces SQLQueryTestSuite, a basic framework for end-to-end SQL test cases defined in spark/sql/core/src/test/resources/sql-tests. This is a more standard way to test SQL queries end-to-end in different open source database systems, because it is more manageable to work with files. This is inspired by HiveCompatibilitySuite, but simplified for general Spark SQL tests. Once this is merged, I can work towards porting SQLQuerySuite over, and eventually also move the existing HiveCompatibilitySuite to use this framework. Unlike HiveCompatibilitySuite, SQLQueryTestSuite compares both the output schema and the output data (in string form). When there is a mismatch, the error message looks like the following: ``` [info] - blacklist.sql !!! IGNORED !!! [info] - number-format.sql *** FAILED *** (2 seconds, 405 milliseconds) [info] Expected "...147483648 -214748364[8]", but got "...147483648 -214748364[9]" Result should match for query #1 (SQLQueryTestSuite.scala:171) [info] org.scalatest.exceptions.TestFailedException: [info] at org.scalatest.Assertions$class.newAssertionFailedException(Assertions.scala:495) [info] at org.scalatest.FunSuite.newAssertionFailedException(FunSuite.scala:1555) [info] at org.scalatest.Assertions$class.assertResult(Assertions.scala:1171) ``` ## How was this patch tested? This is a test infrastructure change. Author: petermaxlee Closes #14472 from petermaxlee/SPARK-16866. (cherry picked from commit b9f8a117097bc102e261b68f38a679d16e19f2e2) Signed-off-by: Wenchen Fan Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/8611bc20 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/8611bc20 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/8611bc20 Branch: refs/heads/branch-2.0 Commit: 8611bc2058eb7397c372de39b59934494569623c Parents: 1e40135 Author: petermaxlee Authored: Wed Aug 10 17:17:21 2016 +0800 Committer: Wenchen Fan Committed: Thu Aug 11 16:43:44 2016 +0800 -- .../resources/sql-tests/inputs/blacklist.sql| 4 + .../sql-tests/inputs/number-format.sql | 13 ++ .../sql-tests/results/number-format.sql.out | 34 +++ .../org/apache/spark/sql/SQLQuerySuite.scala| 36 .../apache/spark/sql/SQLQueryTestSuite.scala| 215 +++ .../sql/catalyst/LogicalPlanToSQLSuite.scala| 3 +- 6 files changed, 267 insertions(+), 38 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/8611bc20/sql/core/src/test/resources/sql-tests/inputs/blacklist.sql -- diff --git a/sql/core/src/test/resources/sql-tests/inputs/blacklist.sql b/sql/core/src/test/resources/sql-tests/inputs/blacklist.sql new file mode 100644 index 000..d69f814 --- /dev/null +++ b/sql/core/src/test/resources/sql-tests/inputs/blacklist.sql @@ -0,0 +1,4 @@ +-- This is a query file that has been blacklisted. +-- It includes a query that should crash Spark. +-- If the test case is run, the whole suite would fail. +some random not working query that should crash Spark. http://git-wip-us.apache.org/repos/asf/spark/blob/8611bc20/sql/core/src/test/resources/sql-tests/inputs/number-format.sql -- diff --git a/sql/core/src/test/resources/sql-tests/inputs/number-format.sql b/sql/core/src/test/resources/sql-tests/inputs/number-format.sql new file mode 100644 index 000..60076a8 --- /dev/null +++ b/sql/core/src/test/resources/sql-tests/inputs/number-format.sql @@ -0,0 +1,13 @@ +-- Verifies how we parse numbers + +-- parse as ints +select 1, -1; + +-- parse as longs +select 2147483648, -2147483649; + +-- parse as decimals +select 9223372036854775808, -9223372036854775809; + +-- various floating point (decimal) formats +select 0.3, -0.8, .5, -.18, 0.; http://git-wip-us.apache.org/repos/asf/spark/blob/8611bc20/sql/core/src/test/resources/sql-tests/results/number-format.sql.out -- diff --git a/sql/core/src/test/resources/sql-tests/results/number-format.sql.out b/sql/core/src/test/resources/sql-tests/results/number-format.sql.out new file mode 100644 index 000..4b800b7 --- /dev/null +++ b/sql/core/src/test/resources/sql-tests/results/number-format.sql.out @@ -0,0 +1,34 @@ +-- Automatically generated by org.apache.spark.sql.SQLQueryTestSuite +-- Number of queries: 4 + + +-- !query 0 +select 1, -1 +-- !query 0 schema +struct<1:int,(-1):int> +-- !query 0 output +1 -1 + + +-- !query 1 +select 2147483648, -214748364
spark git commit: [SPARK-17015][SQL] group-by/order-by ordinal and arithmetic tests
Repository: spark Updated Branches: refs/heads/master 0db373aaf -> a7b02db45 [SPARK-17015][SQL] group-by/order-by ordinal and arithmetic tests ## What changes were proposed in this pull request? This patch adds three test files: 1. arithmetic.sql.out 2. order-by-ordinal.sql 3. group-by-ordinal.sql This includes https://github.com/apache/spark/pull/14594. ## How was this patch tested? This is a test case change. Author: petermaxlee Closes #14595 from petermaxlee/SPARK-17015. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/a7b02db4 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/a7b02db4 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/a7b02db4 Branch: refs/heads/master Commit: a7b02db457d5fc663ce6a1ef01bf04689870e6b4 Parents: 0db373a Author: petermaxlee Authored: Thu Aug 11 01:43:08 2016 -0700 Committer: Reynold Xin Committed: Thu Aug 11 01:43:08 2016 -0700 -- .../spark/sql/catalyst/analysis/Analyzer.scala | 24 +- .../resources/sql-tests/inputs/arithmetic.sql | 26 +++ .../sql-tests/inputs/group-by-ordinal.sql | 50 + .../sql-tests/inputs/order-by-ordinal.sql | 36 +++ .../sql-tests/results/arithmetic.sql.out| 178 +++ .../sql-tests/results/group-by-ordinal.sql.out | 168 ++ .../sql-tests/results/order-by-ordinal.sql.out | 143 .../org/apache/spark/sql/SQLQuerySuite.scala| 220 --- 8 files changed, 613 insertions(+), 232 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/a7b02db4/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala -- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala index 25202b5..14a2a32 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala @@ -547,8 +547,7 @@ class Analyzer( case a: Aggregate if containsStar(a.aggregateExpressions) => if (conf.groupByOrdinal && a.groupingExpressions.exists(IntegerIndex.unapply(_).nonEmpty)) { failAnalysis( -"Group by position: star is not allowed to use in the select list " + - "when using ordinals in group by") +"Star (*) is not allowed in select list when GROUP BY ordinal position is used") } else { a.copy(aggregateExpressions = buildExpandedProjectList(a.aggregateExpressions, a.child)) } @@ -723,9 +722,9 @@ class Analyzer( if (index > 0 && index <= child.output.size) { SortOrder(child.output(index - 1), direction) } else { - throw new UnresolvedException(s, -s"Order/sort By position: $index does not exist " + -s"The Select List is indexed from 1 to ${child.output.size}") + s.failAnalysis( +s"ORDER BY position $index is not in select list " + + s"(valid range is [1, ${child.output.size}])") } case o => o } @@ -737,17 +736,18 @@ class Analyzer( if conf.groupByOrdinal && aggs.forall(_.resolved) && groups.exists(IntegerIndex.unapply(_).nonEmpty) => val newGroups = groups.map { - case IntegerIndex(index) if index > 0 && index <= aggs.size => + case ordinal @ IntegerIndex(index) if index > 0 && index <= aggs.size => aggs(index - 1) match { case e if ResolveAggregateFunctions.containsAggregate(e) => -throw new UnresolvedException(a, - s"Group by position: the '$index'th column in the select contains an " + - s"aggregate function: ${e.sql}. Aggregate functions are not allowed in GROUP BY") +ordinal.failAnalysis( + s"GROUP BY position $index is an aggregate function, and " + +"aggregate functions are not allowed in GROUP BY") case o => o } - case IntegerIndex(index) => -throw new UnresolvedException(a, - s"Group by position: '$index' exceeds the size of the select list '${aggs.size}'.") + case ordinal @ IntegerIndex(index) => +ordinal.failAnalysis( + s"GROUP BY position $index is not in select list " + +s"(valid range is [1, ${aggs.size}])") case o => o } Aggregate(newGroups, aggs, child) http://git-wip-us.apache.org/repos/asf/spark/blob/a7b02db4/sql/core/src/test/resources/sql-tests/inpu