[GitHub] spark pull request #19529: [SPARK-22308] Support alternative unit testing st...
Github user asfgit closed the pull request at: https://github.com/apache/spark/pull/19529 --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request #19529: [SPARK-22308] Support alternative unit testing st...
Github user nkronenfeld commented on a diff in the pull request: https://github.com/apache/spark/pull/19529#discussion_r147038647 --- Diff: sql/core/src/test/scala/org/apache/spark/sql/test/SharedSQLContext.scala --- @@ -17,86 +17,8 @@ package org.apache.spark.sql.test -import scala.concurrent.duration._ - -import org.scalatest.BeforeAndAfterEach -import org.scalatest.concurrent.Eventually - -import org.apache.spark.{DebugFilesystem, SparkConf} -import org.apache.spark.sql.{SparkSession, SQLContext} -import org.apache.spark.sql.internal.SQLConf - -/** - * Helper trait for SQL test suites where all tests share a single [[TestSparkSession]]. - */ -trait SharedSQLContext extends SQLTestUtils with BeforeAndAfterEach with Eventually { - - protected def sparkConf = { -new SparkConf() - .set("spark.hadoop.fs.file.impl", classOf[DebugFilesystem].getName) - .set("spark.unsafe.exceptionOnMemoryLeak", "true") - .set(SQLConf.CODEGEN_FALLBACK.key, "false") - } - - /** - * The [[TestSparkSession]] to use for all tests in this suite. - * - * By default, the underlying [[org.apache.spark.SparkContext]] will be run in local - * mode with the default test configurations. - */ - private var _spark: TestSparkSession = null - - /** - * The [[TestSparkSession]] to use for all tests in this suite. - */ - protected implicit def spark: SparkSession = _spark - - /** - * The [[TestSQLContext]] to use for all tests in this suite. - */ - protected implicit def sqlContext: SQLContext = _spark.sqlContext - - protected def createSparkSession: TestSparkSession = { -new TestSparkSession(sparkConf) - } - - /** - * Initialize the [[TestSparkSession]]. - */ +trait SharedSQLContext extends SQLTestUtils with SharedSparkSession { --- End diff -- We could... that would more fit the pattern of what we've done now for PlanTest/PlanTestBase and SQLTestUtils/SQLTestUtilsBase. I hesitated in this case just because the two are such conceptually different concepts, and the idea is that both would actually get used - SharedSQLContext in internal tests, SharedSparkSession in external tests. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request #19529: [SPARK-22308] Support alternative unit testing st...
Github user nkronenfeld commented on a diff in the pull request: https://github.com/apache/spark/pull/19529#discussion_r147038487 --- Diff: sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/PlanTest.scala --- @@ -29,7 +31,14 @@ import org.apache.spark.sql.internal.SQLConf /** * Provides helper methods for comparing plans. */ -trait PlanTest extends SparkFunSuite with PredicateHelper { +trait PlanTest extends SparkFunSuite with PlanTestBase { +} --- End diff -- done --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request #19529: [SPARK-22308] Support alternative unit testing st...
Github user nkronenfeld commented on a diff in the pull request: https://github.com/apache/spark/pull/19529#discussion_r147038504 --- Diff: sql/core/src/test/scala/org/apache/spark/sql/test/SharedSQLContext.scala --- @@ -17,86 +17,8 @@ package org.apache.spark.sql.test -import scala.concurrent.duration._ - -import org.scalatest.BeforeAndAfterEach -import org.scalatest.concurrent.Eventually - -import org.apache.spark.{DebugFilesystem, SparkConf} -import org.apache.spark.sql.{SparkSession, SQLContext} -import org.apache.spark.sql.internal.SQLConf - -/** - * Helper trait for SQL test suites where all tests share a single [[TestSparkSession]]. - */ -trait SharedSQLContext extends SQLTestUtils with BeforeAndAfterEach with Eventually { - - protected def sparkConf = { -new SparkConf() - .set("spark.hadoop.fs.file.impl", classOf[DebugFilesystem].getName) - .set("spark.unsafe.exceptionOnMemoryLeak", "true") - .set(SQLConf.CODEGEN_FALLBACK.key, "false") - } - - /** - * The [[TestSparkSession]] to use for all tests in this suite. - * - * By default, the underlying [[org.apache.spark.SparkContext]] will be run in local - * mode with the default test configurations. - */ - private var _spark: TestSparkSession = null - - /** - * The [[TestSparkSession]] to use for all tests in this suite. - */ - protected implicit def spark: SparkSession = _spark - - /** - * The [[TestSQLContext]] to use for all tests in this suite. - */ - protected implicit def sqlContext: SQLContext = _spark.sqlContext - - protected def createSparkSession: TestSparkSession = { -new TestSparkSession(sparkConf) - } - - /** - * Initialize the [[TestSparkSession]]. - */ +trait SharedSQLContext extends SQLTestUtils with SharedSparkSession { protected override def beforeAll(): Unit = { -SparkSession.sqlListener.set(null) -if (_spark == null) { - _spark = createSparkSession -} -// Ensure we have initialized the context before calling parent code super.beforeAll() --- End diff -- we don't. Removed. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request #19529: [SPARK-22308] Support alternative unit testing st...
Github user gatorsmile commented on a diff in the pull request: https://github.com/apache/spark/pull/19529#discussion_r146978824 --- Diff: sql/core/src/test/scala/org/apache/spark/sql/test/SharedSQLContext.scala --- @@ -17,86 +17,8 @@ package org.apache.spark.sql.test -import scala.concurrent.duration._ - -import org.scalatest.BeforeAndAfterEach -import org.scalatest.concurrent.Eventually - -import org.apache.spark.{DebugFilesystem, SparkConf} -import org.apache.spark.sql.{SparkSession, SQLContext} -import org.apache.spark.sql.internal.SQLConf - -/** - * Helper trait for SQL test suites where all tests share a single [[TestSparkSession]]. - */ -trait SharedSQLContext extends SQLTestUtils with BeforeAndAfterEach with Eventually { - - protected def sparkConf = { -new SparkConf() - .set("spark.hadoop.fs.file.impl", classOf[DebugFilesystem].getName) - .set("spark.unsafe.exceptionOnMemoryLeak", "true") - .set(SQLConf.CODEGEN_FALLBACK.key, "false") - } - - /** - * The [[TestSparkSession]] to use for all tests in this suite. - * - * By default, the underlying [[org.apache.spark.SparkContext]] will be run in local - * mode with the default test configurations. - */ - private var _spark: TestSparkSession = null - - /** - * The [[TestSparkSession]] to use for all tests in this suite. - */ - protected implicit def spark: SparkSession = _spark - - /** - * The [[TestSQLContext]] to use for all tests in this suite. - */ - protected implicit def sqlContext: SQLContext = _spark.sqlContext - - protected def createSparkSession: TestSparkSession = { -new TestSparkSession(sparkConf) - } - - /** - * Initialize the [[TestSparkSession]]. - */ +trait SharedSQLContext extends SQLTestUtils with SharedSparkSession { --- End diff -- Does this work? Could we move `SharedSparkSession.scala` in this file? ```Scala trait SharedSQLContext extends SQLTestUtils with SharedSparkSession ``` --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request #19529: [SPARK-22308] Support alternative unit testing st...
Github user gatorsmile commented on a diff in the pull request: https://github.com/apache/spark/pull/19529#discussion_r146976375 --- Diff: sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/PlanTest.scala --- @@ -29,7 +31,14 @@ import org.apache.spark.sql.internal.SQLConf /** * Provides helper methods for comparing plans. */ -trait PlanTest extends SparkFunSuite with PredicateHelper { +trait PlanTest extends SparkFunSuite with PlanTestBase { +} --- End diff -- Please remove the useless `{` and `}` --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request #19529: [SPARK-22308] Support alternative unit testing st...
Github user gatorsmile commented on a diff in the pull request: https://github.com/apache/spark/pull/19529#discussion_r14693 --- Diff: sql/core/src/test/scala/org/apache/spark/sql/test/SharedSQLContext.scala --- @@ -17,86 +17,8 @@ package org.apache.spark.sql.test -import scala.concurrent.duration._ - -import org.scalatest.BeforeAndAfterEach -import org.scalatest.concurrent.Eventually - -import org.apache.spark.{DebugFilesystem, SparkConf} -import org.apache.spark.sql.{SparkSession, SQLContext} -import org.apache.spark.sql.internal.SQLConf - -/** - * Helper trait for SQL test suites where all tests share a single [[TestSparkSession]]. - */ -trait SharedSQLContext extends SQLTestUtils with BeforeAndAfterEach with Eventually { - - protected def sparkConf = { -new SparkConf() - .set("spark.hadoop.fs.file.impl", classOf[DebugFilesystem].getName) - .set("spark.unsafe.exceptionOnMemoryLeak", "true") - .set(SQLConf.CODEGEN_FALLBACK.key, "false") - } - - /** - * The [[TestSparkSession]] to use for all tests in this suite. - * - * By default, the underlying [[org.apache.spark.SparkContext]] will be run in local - * mode with the default test configurations. - */ - private var _spark: TestSparkSession = null - - /** - * The [[TestSparkSession]] to use for all tests in this suite. - */ - protected implicit def spark: SparkSession = _spark - - /** - * The [[TestSQLContext]] to use for all tests in this suite. - */ - protected implicit def sqlContext: SQLContext = _spark.sqlContext - - protected def createSparkSession: TestSparkSession = { -new TestSparkSession(sparkConf) - } - - /** - * Initialize the [[TestSparkSession]]. - */ +trait SharedSQLContext extends SQLTestUtils with SharedSparkSession { protected override def beforeAll(): Unit = { -SparkSession.sqlListener.set(null) -if (_spark == null) { - _spark = createSparkSession -} -// Ensure we have initialized the context before calling parent code super.beforeAll() --- End diff -- If this `beforeAll` is just calling `super.beforeAll`, why do we still need to override it? --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request #19529: [SPARK-22308] Support alternative unit testing st...
Github user nkronenfeld commented on a diff in the pull request: https://github.com/apache/spark/pull/19529#discussion_r146887435 --- Diff: sql/core/src/test/scala/org/apache/spark/sql/test/SharedSQLContext.scala --- @@ -17,86 +17,8 @@ package org.apache.spark.sql.test -import scala.concurrent.duration._ - -import org.scalatest.BeforeAndAfterEach -import org.scalatest.concurrent.Eventually - -import org.apache.spark.{DebugFilesystem, SparkConf} -import org.apache.spark.sql.{SparkSession, SQLContext} -import org.apache.spark.sql.internal.SQLConf - -/** - * Helper trait for SQL test suites where all tests share a single [[TestSparkSession]]. - */ -trait SharedSQLContext extends SQLTestUtils with BeforeAndAfterEach with Eventually { - - protected def sparkConf = { -new SparkConf() - .set("spark.hadoop.fs.file.impl", classOf[DebugFilesystem].getName) - .set("spark.unsafe.exceptionOnMemoryLeak", "true") - .set(SQLConf.CODEGEN_FALLBACK.key, "false") - } - - /** - * The [[TestSparkSession]] to use for all tests in this suite. - * - * By default, the underlying [[org.apache.spark.SparkContext]] will be run in local - * mode with the default test configurations. - */ - private var _spark: TestSparkSession = null - - /** - * The [[TestSparkSession]] to use for all tests in this suite. - */ - protected implicit def spark: SparkSession = _spark - - /** - * The [[TestSQLContext]] to use for all tests in this suite. - */ - protected implicit def sqlContext: SQLContext = _spark.sqlContext - - protected def createSparkSession: TestSparkSession = { -new TestSparkSession(sparkConf) - } - - /** - * Initialize the [[TestSparkSession]]. - */ +trait SharedSQLContext extends SQLTestUtils with SharedSparkSession { protected override def beforeAll(): Unit = { --- End diff -- It's still used throughout the unit tests. I had noticed another issue related to that ([SPARK-15037]), but that is marked resolved without having gotten rid of this. Note that SharedSQLContext is to SharedSessionContext as PlanTest is to PlanTestBase - it extends FunSuite. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request #19529: [SPARK-22308] Support alternative unit testing st...
Github user nkronenfeld commented on a diff in the pull request: https://github.com/apache/spark/pull/19529#discussion_r146885975 --- Diff: core/src/test/scala/org/apache/spark/SharedSparkContext.scala --- @@ -29,10 +29,25 @@ trait SharedSparkContext extends BeforeAndAfterAll with BeforeAndAfterEach { sel var conf = new SparkConf(false) + /** + * Initialize the [[SparkContext]]. Generally, this is just called from --- End diff -- fixed here, not elsewhere yet --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request #19529: [SPARK-22308] Support alternative unit testing st...
Github user nkronenfeld commented on a diff in the pull request: https://github.com/apache/spark/pull/19529#discussion_r146885926 --- Diff: sql/core/src/test/scala/org/apache/spark/sql/test/SQLTestUtils.scala --- @@ -52,17 +55,142 @@ import org.apache.spark.util.{UninterruptibleThread, Utils} * Subclasses should *not* create `SQLContext`s in the test suite constructor, which is * prone to leaving multiple overlapping [[org.apache.spark.SparkContext]]s in the same JVM. */ -private[sql] trait SQLTestUtils - extends SparkFunSuite with Eventually +private[sql] trait SQLTestUtils extends SparkFunSuite with SQLTestUtilsBase with PlanTest { + // Whether to materialize all test data before the first test is run + private var loadTestDataBeforeTests = false + + protected override def beforeAll(): Unit = { +super.beforeAll() +if (loadTestDataBeforeTests) { + loadTestData() +} + } + + /** + * Materialize the test data immediately after the `SQLContext` is set up. + * This is necessary if the data is accessed by name but not through direct reference. + */ + protected def setupTestData(): Unit = { +loadTestDataBeforeTests = true + } + + /** + * Disable stdout and stderr when running the test. To not output the logs to the console, + * ConsoleAppender's `follow` should be set to `true` so that it will honors reassignments of + * System.out or System.err. Otherwise, ConsoleAppender will still output to the console even if + * we change System.out and System.err. + */ + protected def testQuietly(name: String)(f: => Unit): Unit = { +test(name) { + quietly { +f + } +} + } + + /** + * Run a test on a separate `UninterruptibleThread`. + */ + protected def testWithUninterruptibleThread(name: String, quietly: Boolean = false) +(body: => Unit): Unit = { +val timeoutMillis = 1 +@transient var ex: Throwable = null + +def runOnThread(): Unit = { + val thread = new UninterruptibleThread(s"Testing thread for test $name") { +override def run(): Unit = { + try { +body + } catch { +case NonFatal(e) => + ex = e + } +} + } + thread.setDaemon(true) + thread.start() + thread.join(timeoutMillis) + if (thread.isAlive) { +thread.interrupt() +// If this interrupt does not work, then this thread is most likely running something that +// is not interruptible. There is not much point to wait for the thread to termniate, and +// we rather let the JVM terminate the thread on exit. +fail( + s"Test '$name' running on o.a.s.util.UninterruptibleThread timed out after" + +s" $timeoutMillis ms") + } else if (ex != null) { +throw ex + } +} + +if (quietly) { + testQuietly(name) { runOnThread() } +} else { + test(name) { runOnThread() } +} + } +} + +private[sql] object SQLTestUtils { --- End diff -- done --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request #19529: [SPARK-22308] Support alternative unit testing st...
Github user jiangxb1987 commented on a diff in the pull request: https://github.com/apache/spark/pull/19529#discussion_r146591649 --- Diff: core/src/test/scala/org/apache/spark/SharedSparkContext.scala --- @@ -29,10 +29,25 @@ trait SharedSparkContext extends BeforeAndAfterAll with BeforeAndAfterEach { sel var conf = new SparkConf(false) + /** + * Initialize the [[SparkContext]]. Generally, this is just called from --- End diff -- nit: The max length of each line in the comment should also be 100, seems you are limiting that to 80? --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request #19529: [SPARK-22308] Support alternative unit testing st...
Github user jiangxb1987 commented on a diff in the pull request: https://github.com/apache/spark/pull/19529#discussion_r146595869 --- Diff: sql/core/src/test/scala/org/apache/spark/sql/test/SharedSQLContext.scala --- @@ -17,86 +17,8 @@ package org.apache.spark.sql.test -import scala.concurrent.duration._ - -import org.scalatest.BeforeAndAfterEach -import org.scalatest.concurrent.Eventually - -import org.apache.spark.{DebugFilesystem, SparkConf} -import org.apache.spark.sql.{SparkSession, SQLContext} -import org.apache.spark.sql.internal.SQLConf - -/** - * Helper trait for SQL test suites where all tests share a single [[TestSparkSession]]. - */ -trait SharedSQLContext extends SQLTestUtils with BeforeAndAfterEach with Eventually { - - protected def sparkConf = { -new SparkConf() - .set("spark.hadoop.fs.file.impl", classOf[DebugFilesystem].getName) - .set("spark.unsafe.exceptionOnMemoryLeak", "true") - .set(SQLConf.CODEGEN_FALLBACK.key, "false") - } - - /** - * The [[TestSparkSession]] to use for all tests in this suite. - * - * By default, the underlying [[org.apache.spark.SparkContext]] will be run in local - * mode with the default test configurations. - */ - private var _spark: TestSparkSession = null - - /** - * The [[TestSparkSession]] to use for all tests in this suite. - */ - protected implicit def spark: SparkSession = _spark - - /** - * The [[TestSQLContext]] to use for all tests in this suite. - */ - protected implicit def sqlContext: SQLContext = _spark.sqlContext - - protected def createSparkSession: TestSparkSession = { -new TestSparkSession(sparkConf) - } - - /** - * Initialize the [[TestSparkSession]]. - */ +trait SharedSQLContext extends SQLTestUtils with SharedSparkSession { protected override def beforeAll(): Unit = { --- End diff -- Why do we still need this? --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request #19529: [SPARK-22308] Support alternative unit testing st...
Github user jiangxb1987 commented on a diff in the pull request: https://github.com/apache/spark/pull/19529#discussion_r146595345 --- Diff: sql/core/src/test/scala/org/apache/spark/sql/test/SQLTestUtils.scala --- @@ -52,17 +55,142 @@ import org.apache.spark.util.{UninterruptibleThread, Utils} * Subclasses should *not* create `SQLContext`s in the test suite constructor, which is * prone to leaving multiple overlapping [[org.apache.spark.SparkContext]]s in the same JVM. */ -private[sql] trait SQLTestUtils - extends SparkFunSuite with Eventually +private[sql] trait SQLTestUtils extends SparkFunSuite with SQLTestUtilsBase with PlanTest { + // Whether to materialize all test data before the first test is run + private var loadTestDataBeforeTests = false + + protected override def beforeAll(): Unit = { +super.beforeAll() +if (loadTestDataBeforeTests) { + loadTestData() +} + } + + /** + * Materialize the test data immediately after the `SQLContext` is set up. + * This is necessary if the data is accessed by name but not through direct reference. + */ + protected def setupTestData(): Unit = { +loadTestDataBeforeTests = true + } + + /** + * Disable stdout and stderr when running the test. To not output the logs to the console, + * ConsoleAppender's `follow` should be set to `true` so that it will honors reassignments of + * System.out or System.err. Otherwise, ConsoleAppender will still output to the console even if + * we change System.out and System.err. + */ + protected def testQuietly(name: String)(f: => Unit): Unit = { +test(name) { + quietly { +f + } +} + } + + /** + * Run a test on a separate `UninterruptibleThread`. + */ + protected def testWithUninterruptibleThread(name: String, quietly: Boolean = false) +(body: => Unit): Unit = { +val timeoutMillis = 1 +@transient var ex: Throwable = null + +def runOnThread(): Unit = { + val thread = new UninterruptibleThread(s"Testing thread for test $name") { +override def run(): Unit = { + try { +body + } catch { +case NonFatal(e) => + ex = e + } +} + } + thread.setDaemon(true) + thread.start() + thread.join(timeoutMillis) + if (thread.isAlive) { +thread.interrupt() +// If this interrupt does not work, then this thread is most likely running something that +// is not interruptible. There is not much point to wait for the thread to termniate, and +// we rather let the JVM terminate the thread on exit. +fail( + s"Test '$name' running on o.a.s.util.UninterruptibleThread timed out after" + +s" $timeoutMillis ms") + } else if (ex != null) { +throw ex + } +} + +if (quietly) { + testQuietly(name) { runOnThread() } +} else { + test(name) { runOnThread() } +} + } +} + +private[sql] object SQLTestUtils { --- End diff -- Could you move this after `SQLTestUtilsBase` ? --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request #19529: [SPARK-22308] Support alternative unit testing st...
Github user nkronenfeld commented on a diff in the pull request: https://github.com/apache/spark/pull/19529#discussion_r146312166 --- Diff: sql/core/src/test/scala/org/apache/spark/sql/test/GenericFlatSpecSuite.scala --- @@ -0,0 +1,45 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + *http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.test + +import org.scalatest.FlatSpec + +/** + * The purpose of this suite is to make sure that generic FlatSpec-based scala + * tests work with a shared spark session + */ +class GenericFlatSpecSuite extends FlatSpec with SharedSparkSession { --- End diff -- Yeah, I wasn't totally sure about that either, but I eventually came to the conclusion that they are useful. I think of it more in terms of preventing regressions; the case they will prevent is the FunSuite dependecy creeping back in to SharedSparkSession. For similar reasons, I should probably put similar tests in for SharedSparkContext. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request #19529: [SPARK-22308] Support alternative unit testing st...
Github user nkronenfeld commented on a diff in the pull request: https://github.com/apache/spark/pull/19529#discussion_r146311684 --- Diff: sql/core/src/test/scala/org/apache/spark/sql/test/SharedSparkSession.scala --- @@ -0,0 +1,119 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + *http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.test + +import scala.concurrent.duration._ + +import org.scalatest.{BeforeAndAfterEach, Suite} +import org.scalatest.concurrent.Eventually + +import org.apache.spark.{DebugFilesystem, SparkConf} +import org.apache.spark.sql.{SparkSession, SQLContext} +import org.apache.spark.sql.internal.SQLConf + +/** + * Helper trait for SQL test suites where all tests share a single [[TestSparkSession]]. + */ +trait SharedSparkSession --- End diff -- I'm not sure what you're asking. It's specific to the Spark SQL package - in that SparkSession resides there - but not specific to using the language SQL. When I say, 'SQL test suites', I'm referring to the former. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request #19529: [SPARK-22308] Support alternative unit testing st...
Github user nkronenfeld commented on a diff in the pull request: https://github.com/apache/spark/pull/19529#discussion_r146311180 --- Diff: sql/core/src/test/scala/org/apache/spark/sql/test/SQLTestUtils.scala --- @@ -52,249 +36,23 @@ import org.apache.spark.util.{UninterruptibleThread, Utils} * Subclasses should *not* create `SQLContext`s in the test suite constructor, which is * prone to leaving multiple overlapping [[org.apache.spark.SparkContext]]s in the same JVM. */ -private[sql] trait SQLTestUtils - extends SparkFunSuite with Eventually - with BeforeAndAfterAll - with SQLTestData - with PlanTest { self => - - protected def sparkContext = spark.sparkContext - +private[sql] trait SQLTestUtils extends SparkFunSuite with SQLTestUtilsBase with PlanTest { --- End diff -- This has more changes - I left the data initialization stuff in SQLTestUtils, as it wouldn't be needed by external tests. All the <'s below are things that I pulled out of SQLTestUtilsBase and put back into SQLTestUtils Running similarly: git checkout 4a779bdac3e75c17b7d36c5a009ba6c948fa9fb6 SQLTestUtils.scala diff SQLTestUtils.scala SQLTestUtilsBase.scala I get 27d26 < import scala.util.control.NonFatal 30c29 < import org.scalatest.BeforeAndAfterAll --- > import org.scalatest.{BeforeAndAfterAll, Suite} 33d31 < import org.apache.spark.SparkFunSuite 38c36 < import org.apache.spark.sql.catalyst.plans.PlanTest --- > import org.apache.spark.sql.catalyst.plans.PlanTestBase 40d37 < import org.apache.spark.sql.catalyst.util._ 43c40 < import org.apache.spark.util.{UninterruptibleThread, Utils} --- > import org.apache.spark.util.Utils 46c43 < * Helper trait that should be extended by all SQL test suites. --- > * Helper trait that can be extended by all external SQL test suites. 48,49c45 < * This allows subclasses to plugin a custom `SQLContext`. It comes with test data < * prepared in advance as well as all implicit conversions used extensively by dataframes. --- > * This allows subclasses to plugin a custom `SQLContext`. 55,56c51,52 < private[sql] trait SQLTestUtils < extends SparkFunSuite with Eventually --- > private[sql] trait SQLTestUtilsBase > extends Eventually 59c55 < with PlanTest { self => --- > with PlanTestBase { self: Suite => 63,65d58 < // Whether to materialize all test data before the first test is run < private var loadTestDataBeforeTests = false < 80,94d72 < /** <* Materialize the test data immediately after the `SQLContext` is set up. <* This is necessary if the data is accessed by name but not through direct reference. <*/ < protected def setupTestData(): Unit = { < loadTestDataBeforeTests = true < } < < protected override def beforeAll(): Unit = { < super.beforeAll() < if (loadTestDataBeforeTests) { < loadTestData() < } < } < 300,354d277 < /** <* Disable stdout and stderr when running the test. To not output the logs to the console, <* ConsoleAppender's `follow` should be set to `true` so that it will honors reassignments of <* System.out or System.err. Otherwise, ConsoleAppender will still output to the console even if <* we change System.out and System.err. <*/ < protected def testQuietly(name: String)(f: => Unit): Unit = { < test(name) { < quietly { < f < } < } < } < < /** <* Run a test on a separate `UninterruptibleThread`. <*/ < protected def testWithUninterruptibleThread(name: String, quietly: Boolean = false) < (body: => Unit): Unit = { < val timeoutMillis = 1 < @transient var ex: Throwable = null < < def runOnThread(): Unit = { < val thread = new UninterruptibleThread(s"Testing thread for test $name") { < override def run(): Unit = { < try { < body < } catch { < case NonFatal(e) => < ex = e < } < } < } < thread.setDaemon(true) < thread.start() < thread.join(timeoutMillis) < if (thread.isAlive) { < thread.interrupt() < // If this i
[GitHub] spark pull request #19529: [SPARK-22308] Support alternative unit testing st...
Github user nkronenfeld commented on a diff in the pull request: https://github.com/apache/spark/pull/19529#discussion_r146308234 --- Diff: sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/PlanTest.scala --- @@ -18,158 +18,9 @@ package org.apache.spark.sql.catalyst.plans import org.apache.spark.SparkFunSuite -import org.apache.spark.sql.AnalysisException -import org.apache.spark.sql.catalyst.analysis.SimpleAnalyzer -import org.apache.spark.sql.catalyst.expressions._ -import org.apache.spark.sql.catalyst.expressions.aggregate.AggregateExpression -import org.apache.spark.sql.catalyst.plans.logical._ -import org.apache.spark.sql.catalyst.util._ -import org.apache.spark.sql.internal.SQLConf /** * Provides helper methods for comparing plans. */ -trait PlanTest extends SparkFunSuite with PredicateHelper { - - // TODO(gatorsmile): remove this from PlanTest and all the analyzer rules - protected def conf = SQLConf.get - - /** - * Since attribute references are given globally unique ids during analysis, - * we must normalize them to check if two different queries are identical. - */ - protected def normalizeExprIds(plan: LogicalPlan) = { -plan transformAllExpressions { - case s: ScalarSubquery => -s.copy(exprId = ExprId(0)) - case e: Exists => -e.copy(exprId = ExprId(0)) - case l: ListQuery => -l.copy(exprId = ExprId(0)) - case a: AttributeReference => -AttributeReference(a.name, a.dataType, a.nullable)(exprId = ExprId(0)) - case a: Alias => -Alias(a.child, a.name)(exprId = ExprId(0)) - case ae: AggregateExpression => -ae.copy(resultId = ExprId(0)) -} - } - - /** - * Normalizes plans: - * - Filter the filter conditions that appear in a plan. For instance, - * ((expr 1 && expr 2) && expr 3), (expr 1 && expr 2 && expr 3), (expr 3 && (expr 1 && expr 2) - * etc., will all now be equivalent. - * - Sample the seed will replaced by 0L. - * - Join conditions will be resorted by hashCode. - */ - protected def normalizePlan(plan: LogicalPlan): LogicalPlan = { -plan transform { - case Filter(condition: Expression, child: LogicalPlan) => - Filter(splitConjunctivePredicates(condition).map(rewriteEqual).sortBy(_.hashCode()) - .reduce(And), child) - case sample: Sample => -sample.copy(seed = 0L) - case Join(left, right, joinType, condition) if condition.isDefined => -val newCondition = - splitConjunctivePredicates(condition.get).map(rewriteEqual).sortBy(_.hashCode()) -.reduce(And) -Join(left, right, joinType, Some(newCondition)) -} - } - - /** - * Rewrite [[EqualTo]] and [[EqualNullSafe]] operator to keep order. The following cases will be - * equivalent: - * 1. (a = b), (b = a); - * 2. (a <=> b), (b <=> a). - */ - private def rewriteEqual(condition: Expression): Expression = condition match { -case eq @ EqualTo(l: Expression, r: Expression) => - Seq(l, r).sortBy(_.hashCode()).reduce(EqualTo) -case eq @ EqualNullSafe(l: Expression, r: Expression) => - Seq(l, r).sortBy(_.hashCode()).reduce(EqualNullSafe) -case _ => condition // Don't reorder. - } - - /** Fails the test if the two plans do not match */ - protected def comparePlans( - plan1: LogicalPlan, - plan2: LogicalPlan, - checkAnalysis: Boolean = true): Unit = { -if (checkAnalysis) { - // Make sure both plan pass checkAnalysis. - SimpleAnalyzer.checkAnalysis(plan1) - SimpleAnalyzer.checkAnalysis(plan2) -} - -val normalized1 = normalizePlan(normalizeExprIds(plan1)) -val normalized2 = normalizePlan(normalizeExprIds(plan2)) -if (normalized1 != normalized2) { - fail( -s""" - |== FAIL: Plans do not match === - |${sideBySide(normalized1.treeString, normalized2.treeString).mkString("\n")} - """.stripMargin) -} - } - - /** Fails the test if the two expressions do not match */ - protected def compareExpressions(e1: Expression, e2: Expression): Unit = { -comparePlans(Filter(e1, OneRowRelation()), Filter(e2, OneRowRelation()), checkAnalysis = false) - } - - /** Fails the test if the join order in the two plans do not match */ - protected def compareJoinOrder(plan1: LogicalPlan, plan2: LogicalPlan) { -val normalized1 = normalizePlan(normalizeExprIds(plan1)) -val normalized2 = normalizePlan(normalizeExprIds(plan2)) -if (!sameJoinPlan(normalized1, normalized2))
[GitHub] spark pull request #19529: [SPARK-22308] Support alternative unit testing st...
Github user srowen commented on a diff in the pull request: https://github.com/apache/spark/pull/19529#discussion_r146186223 --- Diff: sql/core/src/test/scala/org/apache/spark/sql/test/SharedSparkSession.scala --- @@ -0,0 +1,119 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + *http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.test + +import scala.concurrent.duration._ + +import org.scalatest.{BeforeAndAfterEach, Suite} +import org.scalatest.concurrent.Eventually + +import org.apache.spark.{DebugFilesystem, SparkConf} +import org.apache.spark.sql.{SparkSession, SQLContext} +import org.apache.spark.sql.internal.SQLConf + +/** + * Helper trait for SQL test suites where all tests share a single [[TestSparkSession]]. + */ +trait SharedSparkSession --- End diff -- Is this actually specific to SQL? And is it really for symmetry with SharedSparkContext? --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request #19529: [SPARK-22308] Support alternative unit testing st...
Github user srowen commented on a diff in the pull request: https://github.com/apache/spark/pull/19529#discussion_r146185400 --- Diff: sql/core/src/test/scala/org/apache/spark/sql/test/GenericFlatSpecSuite.scala --- @@ -0,0 +1,45 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + *http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.test + +import org.scalatest.FlatSpec + +/** + * The purpose of this suite is to make sure that generic FlatSpec-based scala + * tests work with a shared spark session + */ +class GenericFlatSpecSuite extends FlatSpec with SharedSparkSession { --- End diff -- These are testing tests? I get it, but I don't know if it's necessary. If FlatSpec didn't work, scalatest would hopefully catch it. If initializeSession / SharedSparkSession didn't, presumably tons of tests wouldn't work. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request #19529: [SPARK-22308] Support alternative unit testing st...
Github user srowen commented on a diff in the pull request: https://github.com/apache/spark/pull/19529#discussion_r146185476 --- Diff: sql/core/src/test/scala/org/apache/spark/sql/test/SQLTestUtils.scala --- @@ -52,249 +36,23 @@ import org.apache.spark.util.{UninterruptibleThread, Utils} * Subclasses should *not* create `SQLContext`s in the test suite constructor, which is * prone to leaving multiple overlapping [[org.apache.spark.SparkContext]]s in the same JVM. */ -private[sql] trait SQLTestUtils - extends SparkFunSuite with Eventually - with BeforeAndAfterAll - with SQLTestData - with PlanTest { self => - - protected def sparkContext = spark.sparkContext - +private[sql] trait SQLTestUtils extends SparkFunSuite with SQLTestUtilsBase with PlanTest { --- End diff -- Same general comment as PlanTestBase vs PlanTest above --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request #19529: [SPARK-22308] Support alternative unit testing st...
Github user srowen commented on a diff in the pull request: https://github.com/apache/spark/pull/19529#discussion_r146185226 --- Diff: sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/PlanTest.scala --- @@ -18,158 +18,9 @@ package org.apache.spark.sql.catalyst.plans import org.apache.spark.SparkFunSuite -import org.apache.spark.sql.AnalysisException -import org.apache.spark.sql.catalyst.analysis.SimpleAnalyzer -import org.apache.spark.sql.catalyst.expressions._ -import org.apache.spark.sql.catalyst.expressions.aggregate.AggregateExpression -import org.apache.spark.sql.catalyst.plans.logical._ -import org.apache.spark.sql.catalyst.util._ -import org.apache.spark.sql.internal.SQLConf /** * Provides helper methods for comparing plans. */ -trait PlanTest extends SparkFunSuite with PredicateHelper { - - // TODO(gatorsmile): remove this from PlanTest and all the analyzer rules - protected def conf = SQLConf.get - - /** - * Since attribute references are given globally unique ids during analysis, - * we must normalize them to check if two different queries are identical. - */ - protected def normalizeExprIds(plan: LogicalPlan) = { -plan transformAllExpressions { - case s: ScalarSubquery => -s.copy(exprId = ExprId(0)) - case e: Exists => -e.copy(exprId = ExprId(0)) - case l: ListQuery => -l.copy(exprId = ExprId(0)) - case a: AttributeReference => -AttributeReference(a.name, a.dataType, a.nullable)(exprId = ExprId(0)) - case a: Alias => -Alias(a.child, a.name)(exprId = ExprId(0)) - case ae: AggregateExpression => -ae.copy(resultId = ExprId(0)) -} - } - - /** - * Normalizes plans: - * - Filter the filter conditions that appear in a plan. For instance, - * ((expr 1 && expr 2) && expr 3), (expr 1 && expr 2 && expr 3), (expr 3 && (expr 1 && expr 2) - * etc., will all now be equivalent. - * - Sample the seed will replaced by 0L. - * - Join conditions will be resorted by hashCode. - */ - protected def normalizePlan(plan: LogicalPlan): LogicalPlan = { -plan transform { - case Filter(condition: Expression, child: LogicalPlan) => - Filter(splitConjunctivePredicates(condition).map(rewriteEqual).sortBy(_.hashCode()) - .reduce(And), child) - case sample: Sample => -sample.copy(seed = 0L) - case Join(left, right, joinType, condition) if condition.isDefined => -val newCondition = - splitConjunctivePredicates(condition.get).map(rewriteEqual).sortBy(_.hashCode()) -.reduce(And) -Join(left, right, joinType, Some(newCondition)) -} - } - - /** - * Rewrite [[EqualTo]] and [[EqualNullSafe]] operator to keep order. The following cases will be - * equivalent: - * 1. (a = b), (b = a); - * 2. (a <=> b), (b <=> a). - */ - private def rewriteEqual(condition: Expression): Expression = condition match { -case eq @ EqualTo(l: Expression, r: Expression) => - Seq(l, r).sortBy(_.hashCode()).reduce(EqualTo) -case eq @ EqualNullSafe(l: Expression, r: Expression) => - Seq(l, r).sortBy(_.hashCode()).reduce(EqualNullSafe) -case _ => condition // Don't reorder. - } - - /** Fails the test if the two plans do not match */ - protected def comparePlans( - plan1: LogicalPlan, - plan2: LogicalPlan, - checkAnalysis: Boolean = true): Unit = { -if (checkAnalysis) { - // Make sure both plan pass checkAnalysis. - SimpleAnalyzer.checkAnalysis(plan1) - SimpleAnalyzer.checkAnalysis(plan2) -} - -val normalized1 = normalizePlan(normalizeExprIds(plan1)) -val normalized2 = normalizePlan(normalizeExprIds(plan2)) -if (normalized1 != normalized2) { - fail( -s""" - |== FAIL: Plans do not match === - |${sideBySide(normalized1.treeString, normalized2.treeString).mkString("\n")} - """.stripMargin) -} - } - - /** Fails the test if the two expressions do not match */ - protected def compareExpressions(e1: Expression, e2: Expression): Unit = { -comparePlans(Filter(e1, OneRowRelation()), Filter(e2, OneRowRelation()), checkAnalysis = false) - } - - /** Fails the test if the join order in the two plans do not match */ - protected def compareJoinOrder(plan1: LogicalPlan, plan2: LogicalPlan) { -val normalized1 = normalizePlan(normalizeExprIds(plan1)) -val normalized2 = normalizePlan(normalizeExprIds(plan2)) -if (!sameJoinPlan(normalized1, normalized2)) {