[GitHub] spark pull request #19529: [SPARK-22308] Support alternative unit testing st...

2017-10-26 Thread asfgit
Github user asfgit closed the pull request at:

https://github.com/apache/spark/pull/19529


---

-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org



[GitHub] spark pull request #19529: [SPARK-22308] Support alternative unit testing st...

2017-10-25 Thread nkronenfeld
Github user nkronenfeld commented on a diff in the pull request:

https://github.com/apache/spark/pull/19529#discussion_r147038647
  
--- Diff: 
sql/core/src/test/scala/org/apache/spark/sql/test/SharedSQLContext.scala ---
@@ -17,86 +17,8 @@
 
 package org.apache.spark.sql.test
 
-import scala.concurrent.duration._
-
-import org.scalatest.BeforeAndAfterEach
-import org.scalatest.concurrent.Eventually
-
-import org.apache.spark.{DebugFilesystem, SparkConf}
-import org.apache.spark.sql.{SparkSession, SQLContext}
-import org.apache.spark.sql.internal.SQLConf
-
-/**
- * Helper trait for SQL test suites where all tests share a single 
[[TestSparkSession]].
- */
-trait SharedSQLContext extends SQLTestUtils with BeforeAndAfterEach with 
Eventually {
-
-  protected def sparkConf = {
-new SparkConf()
-  .set("spark.hadoop.fs.file.impl", classOf[DebugFilesystem].getName)
-  .set("spark.unsafe.exceptionOnMemoryLeak", "true")
-  .set(SQLConf.CODEGEN_FALLBACK.key, "false")
-  }
-
-  /**
-   * The [[TestSparkSession]] to use for all tests in this suite.
-   *
-   * By default, the underlying [[org.apache.spark.SparkContext]] will be 
run in local
-   * mode with the default test configurations.
-   */
-  private var _spark: TestSparkSession = null
-
-  /**
-   * The [[TestSparkSession]] to use for all tests in this suite.
-   */
-  protected implicit def spark: SparkSession = _spark
-
-  /**
-   * The [[TestSQLContext]] to use for all tests in this suite.
-   */
-  protected implicit def sqlContext: SQLContext = _spark.sqlContext
-
-  protected def createSparkSession: TestSparkSession = {
-new TestSparkSession(sparkConf)
-  }
-
-  /**
-   * Initialize the [[TestSparkSession]].
-   */
+trait SharedSQLContext extends SQLTestUtils with SharedSparkSession {
--- End diff --

We could... that would more fit the pattern of what we've done now for 
PlanTest/PlanTestBase and SQLTestUtils/SQLTestUtilsBase.

I hesitated in this case just because the two are such conceptually 
different concepts, and the idea is that both would actually get used - 
SharedSQLContext in internal tests, SharedSparkSession in external tests.


---

-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org



[GitHub] spark pull request #19529: [SPARK-22308] Support alternative unit testing st...

2017-10-25 Thread nkronenfeld
Github user nkronenfeld commented on a diff in the pull request:

https://github.com/apache/spark/pull/19529#discussion_r147038487
  
--- Diff: 
sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/PlanTest.scala 
---
@@ -29,7 +31,14 @@ import org.apache.spark.sql.internal.SQLConf
 /**
  * Provides helper methods for comparing plans.
  */
-trait PlanTest extends SparkFunSuite with PredicateHelper {
+trait PlanTest extends SparkFunSuite with PlanTestBase {
+}
--- End diff --

done


---

-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org



[GitHub] spark pull request #19529: [SPARK-22308] Support alternative unit testing st...

2017-10-25 Thread nkronenfeld
Github user nkronenfeld commented on a diff in the pull request:

https://github.com/apache/spark/pull/19529#discussion_r147038504
  
--- Diff: 
sql/core/src/test/scala/org/apache/spark/sql/test/SharedSQLContext.scala ---
@@ -17,86 +17,8 @@
 
 package org.apache.spark.sql.test
 
-import scala.concurrent.duration._
-
-import org.scalatest.BeforeAndAfterEach
-import org.scalatest.concurrent.Eventually
-
-import org.apache.spark.{DebugFilesystem, SparkConf}
-import org.apache.spark.sql.{SparkSession, SQLContext}
-import org.apache.spark.sql.internal.SQLConf
-
-/**
- * Helper trait for SQL test suites where all tests share a single 
[[TestSparkSession]].
- */
-trait SharedSQLContext extends SQLTestUtils with BeforeAndAfterEach with 
Eventually {
-
-  protected def sparkConf = {
-new SparkConf()
-  .set("spark.hadoop.fs.file.impl", classOf[DebugFilesystem].getName)
-  .set("spark.unsafe.exceptionOnMemoryLeak", "true")
-  .set(SQLConf.CODEGEN_FALLBACK.key, "false")
-  }
-
-  /**
-   * The [[TestSparkSession]] to use for all tests in this suite.
-   *
-   * By default, the underlying [[org.apache.spark.SparkContext]] will be 
run in local
-   * mode with the default test configurations.
-   */
-  private var _spark: TestSparkSession = null
-
-  /**
-   * The [[TestSparkSession]] to use for all tests in this suite.
-   */
-  protected implicit def spark: SparkSession = _spark
-
-  /**
-   * The [[TestSQLContext]] to use for all tests in this suite.
-   */
-  protected implicit def sqlContext: SQLContext = _spark.sqlContext
-
-  protected def createSparkSession: TestSparkSession = {
-new TestSparkSession(sparkConf)
-  }
-
-  /**
-   * Initialize the [[TestSparkSession]].
-   */
+trait SharedSQLContext extends SQLTestUtils with SharedSparkSession {
   protected override def beforeAll(): Unit = {
-SparkSession.sqlListener.set(null)
-if (_spark == null) {
-  _spark = createSparkSession
-}
-// Ensure we have initialized the context before calling parent code
 super.beforeAll()
--- End diff --

we don't.  Removed.


---

-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org



[GitHub] spark pull request #19529: [SPARK-22308] Support alternative unit testing st...

2017-10-25 Thread gatorsmile
Github user gatorsmile commented on a diff in the pull request:

https://github.com/apache/spark/pull/19529#discussion_r146978824
  
--- Diff: 
sql/core/src/test/scala/org/apache/spark/sql/test/SharedSQLContext.scala ---
@@ -17,86 +17,8 @@
 
 package org.apache.spark.sql.test
 
-import scala.concurrent.duration._
-
-import org.scalatest.BeforeAndAfterEach
-import org.scalatest.concurrent.Eventually
-
-import org.apache.spark.{DebugFilesystem, SparkConf}
-import org.apache.spark.sql.{SparkSession, SQLContext}
-import org.apache.spark.sql.internal.SQLConf
-
-/**
- * Helper trait for SQL test suites where all tests share a single 
[[TestSparkSession]].
- */
-trait SharedSQLContext extends SQLTestUtils with BeforeAndAfterEach with 
Eventually {
-
-  protected def sparkConf = {
-new SparkConf()
-  .set("spark.hadoop.fs.file.impl", classOf[DebugFilesystem].getName)
-  .set("spark.unsafe.exceptionOnMemoryLeak", "true")
-  .set(SQLConf.CODEGEN_FALLBACK.key, "false")
-  }
-
-  /**
-   * The [[TestSparkSession]] to use for all tests in this suite.
-   *
-   * By default, the underlying [[org.apache.spark.SparkContext]] will be 
run in local
-   * mode with the default test configurations.
-   */
-  private var _spark: TestSparkSession = null
-
-  /**
-   * The [[TestSparkSession]] to use for all tests in this suite.
-   */
-  protected implicit def spark: SparkSession = _spark
-
-  /**
-   * The [[TestSQLContext]] to use for all tests in this suite.
-   */
-  protected implicit def sqlContext: SQLContext = _spark.sqlContext
-
-  protected def createSparkSession: TestSparkSession = {
-new TestSparkSession(sparkConf)
-  }
-
-  /**
-   * Initialize the [[TestSparkSession]].
-   */
+trait SharedSQLContext extends SQLTestUtils with SharedSparkSession {
--- End diff --

Does this work? Could we move `SharedSparkSession.scala` in this file?

```Scala
trait SharedSQLContext extends SQLTestUtils with SharedSparkSession
```


---

-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org



[GitHub] spark pull request #19529: [SPARK-22308] Support alternative unit testing st...

2017-10-25 Thread gatorsmile
Github user gatorsmile commented on a diff in the pull request:

https://github.com/apache/spark/pull/19529#discussion_r146976375
  
--- Diff: 
sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/PlanTest.scala 
---
@@ -29,7 +31,14 @@ import org.apache.spark.sql.internal.SQLConf
 /**
  * Provides helper methods for comparing plans.
  */
-trait PlanTest extends SparkFunSuite with PredicateHelper {
+trait PlanTest extends SparkFunSuite with PlanTestBase {
+}
--- End diff --

Please remove the useless `{` and `}`


---

-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org



[GitHub] spark pull request #19529: [SPARK-22308] Support alternative unit testing st...

2017-10-25 Thread gatorsmile
Github user gatorsmile commented on a diff in the pull request:

https://github.com/apache/spark/pull/19529#discussion_r14693
  
--- Diff: 
sql/core/src/test/scala/org/apache/spark/sql/test/SharedSQLContext.scala ---
@@ -17,86 +17,8 @@
 
 package org.apache.spark.sql.test
 
-import scala.concurrent.duration._
-
-import org.scalatest.BeforeAndAfterEach
-import org.scalatest.concurrent.Eventually
-
-import org.apache.spark.{DebugFilesystem, SparkConf}
-import org.apache.spark.sql.{SparkSession, SQLContext}
-import org.apache.spark.sql.internal.SQLConf
-
-/**
- * Helper trait for SQL test suites where all tests share a single 
[[TestSparkSession]].
- */
-trait SharedSQLContext extends SQLTestUtils with BeforeAndAfterEach with 
Eventually {
-
-  protected def sparkConf = {
-new SparkConf()
-  .set("spark.hadoop.fs.file.impl", classOf[DebugFilesystem].getName)
-  .set("spark.unsafe.exceptionOnMemoryLeak", "true")
-  .set(SQLConf.CODEGEN_FALLBACK.key, "false")
-  }
-
-  /**
-   * The [[TestSparkSession]] to use for all tests in this suite.
-   *
-   * By default, the underlying [[org.apache.spark.SparkContext]] will be 
run in local
-   * mode with the default test configurations.
-   */
-  private var _spark: TestSparkSession = null
-
-  /**
-   * The [[TestSparkSession]] to use for all tests in this suite.
-   */
-  protected implicit def spark: SparkSession = _spark
-
-  /**
-   * The [[TestSQLContext]] to use for all tests in this suite.
-   */
-  protected implicit def sqlContext: SQLContext = _spark.sqlContext
-
-  protected def createSparkSession: TestSparkSession = {
-new TestSparkSession(sparkConf)
-  }
-
-  /**
-   * Initialize the [[TestSparkSession]].
-   */
+trait SharedSQLContext extends SQLTestUtils with SharedSparkSession {
   protected override def beforeAll(): Unit = {
-SparkSession.sqlListener.set(null)
-if (_spark == null) {
-  _spark = createSparkSession
-}
-// Ensure we have initialized the context before calling parent code
 super.beforeAll()
--- End diff --

If this `beforeAll` is just calling `super.beforeAll`, why do we still need 
to override it? 


---

-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org



[GitHub] spark pull request #19529: [SPARK-22308] Support alternative unit testing st...

2017-10-25 Thread nkronenfeld
Github user nkronenfeld commented on a diff in the pull request:

https://github.com/apache/spark/pull/19529#discussion_r146887435
  
--- Diff: 
sql/core/src/test/scala/org/apache/spark/sql/test/SharedSQLContext.scala ---
@@ -17,86 +17,8 @@
 
 package org.apache.spark.sql.test
 
-import scala.concurrent.duration._
-
-import org.scalatest.BeforeAndAfterEach
-import org.scalatest.concurrent.Eventually
-
-import org.apache.spark.{DebugFilesystem, SparkConf}
-import org.apache.spark.sql.{SparkSession, SQLContext}
-import org.apache.spark.sql.internal.SQLConf
-
-/**
- * Helper trait for SQL test suites where all tests share a single 
[[TestSparkSession]].
- */
-trait SharedSQLContext extends SQLTestUtils with BeforeAndAfterEach with 
Eventually {
-
-  protected def sparkConf = {
-new SparkConf()
-  .set("spark.hadoop.fs.file.impl", classOf[DebugFilesystem].getName)
-  .set("spark.unsafe.exceptionOnMemoryLeak", "true")
-  .set(SQLConf.CODEGEN_FALLBACK.key, "false")
-  }
-
-  /**
-   * The [[TestSparkSession]] to use for all tests in this suite.
-   *
-   * By default, the underlying [[org.apache.spark.SparkContext]] will be 
run in local
-   * mode with the default test configurations.
-   */
-  private var _spark: TestSparkSession = null
-
-  /**
-   * The [[TestSparkSession]] to use for all tests in this suite.
-   */
-  protected implicit def spark: SparkSession = _spark
-
-  /**
-   * The [[TestSQLContext]] to use for all tests in this suite.
-   */
-  protected implicit def sqlContext: SQLContext = _spark.sqlContext
-
-  protected def createSparkSession: TestSparkSession = {
-new TestSparkSession(sparkConf)
-  }
-
-  /**
-   * Initialize the [[TestSparkSession]].
-   */
+trait SharedSQLContext extends SQLTestUtils with SharedSparkSession {
   protected override def beforeAll(): Unit = {
--- End diff --

It's still used throughout the unit tests.
I had noticed another issue related to that ([SPARK-15037]), but that is 
marked resolved without having gotten rid of this.
Note that SharedSQLContext is to SharedSessionContext as PlanTest is to 
PlanTestBase - it extends FunSuite.


---

-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org



[GitHub] spark pull request #19529: [SPARK-22308] Support alternative unit testing st...

2017-10-25 Thread nkronenfeld
Github user nkronenfeld commented on a diff in the pull request:

https://github.com/apache/spark/pull/19529#discussion_r146885975
  
--- Diff: core/src/test/scala/org/apache/spark/SharedSparkContext.scala ---
@@ -29,10 +29,25 @@ trait SharedSparkContext extends BeforeAndAfterAll with 
BeforeAndAfterEach { sel
 
   var conf = new SparkConf(false)
 
+  /**
+   * Initialize the [[SparkContext]].  Generally, this is just called from
--- End diff --

fixed here, not elsewhere yet


---

-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org



[GitHub] spark pull request #19529: [SPARK-22308] Support alternative unit testing st...

2017-10-25 Thread nkronenfeld
Github user nkronenfeld commented on a diff in the pull request:

https://github.com/apache/spark/pull/19529#discussion_r146885926
  
--- Diff: 
sql/core/src/test/scala/org/apache/spark/sql/test/SQLTestUtils.scala ---
@@ -52,17 +55,142 @@ import org.apache.spark.util.{UninterruptibleThread, 
Utils}
  * Subclasses should *not* create `SQLContext`s in the test suite 
constructor, which is
  * prone to leaving multiple overlapping 
[[org.apache.spark.SparkContext]]s in the same JVM.
  */
-private[sql] trait SQLTestUtils
-  extends SparkFunSuite with Eventually
+private[sql] trait SQLTestUtils extends SparkFunSuite with 
SQLTestUtilsBase with PlanTest {
+  // Whether to materialize all test data before the first test is run
+  private var loadTestDataBeforeTests = false
+
+  protected override def beforeAll(): Unit = {
+super.beforeAll()
+if (loadTestDataBeforeTests) {
+  loadTestData()
+}
+  }
+
+  /**
+   * Materialize the test data immediately after the `SQLContext` is set 
up.
+   * This is necessary if the data is accessed by name but not through 
direct reference.
+   */
+  protected def setupTestData(): Unit = {
+loadTestDataBeforeTests = true
+  }
+
+  /**
+   * Disable stdout and stderr when running the test. To not output the 
logs to the console,
+   * ConsoleAppender's `follow` should be set to `true` so that it will 
honors reassignments of
+   * System.out or System.err. Otherwise, ConsoleAppender will still 
output to the console even if
+   * we change System.out and System.err.
+   */
+  protected def testQuietly(name: String)(f: => Unit): Unit = {
+test(name) {
+  quietly {
+f
+  }
+}
+  }
+
+  /**
+   * Run a test on a separate `UninterruptibleThread`.
+   */
+  protected def testWithUninterruptibleThread(name: String, quietly: 
Boolean = false)
+(body: => Unit): Unit = {
+val timeoutMillis = 1
+@transient var ex: Throwable = null
+
+def runOnThread(): Unit = {
+  val thread = new UninterruptibleThread(s"Testing thread for test 
$name") {
+override def run(): Unit = {
+  try {
+body
+  } catch {
+case NonFatal(e) =>
+  ex = e
+  }
+}
+  }
+  thread.setDaemon(true)
+  thread.start()
+  thread.join(timeoutMillis)
+  if (thread.isAlive) {
+thread.interrupt()
+// If this interrupt does not work, then this thread is most 
likely running something that
+// is not interruptible. There is not much point to wait for the 
thread to termniate, and
+// we rather let the JVM terminate the thread on exit.
+fail(
+  s"Test '$name' running on o.a.s.util.UninterruptibleThread timed 
out after" +
+s" $timeoutMillis ms")
+  } else if (ex != null) {
+throw ex
+  }
+}
+
+if (quietly) {
+  testQuietly(name) { runOnThread() }
+} else {
+  test(name) { runOnThread() }
+}
+  }
+}
+
+private[sql] object SQLTestUtils {
--- End diff --

done


---

-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org



[GitHub] spark pull request #19529: [SPARK-22308] Support alternative unit testing st...

2017-10-24 Thread jiangxb1987
Github user jiangxb1987 commented on a diff in the pull request:

https://github.com/apache/spark/pull/19529#discussion_r146591649
  
--- Diff: core/src/test/scala/org/apache/spark/SharedSparkContext.scala ---
@@ -29,10 +29,25 @@ trait SharedSparkContext extends BeforeAndAfterAll with 
BeforeAndAfterEach { sel
 
   var conf = new SparkConf(false)
 
+  /**
+   * Initialize the [[SparkContext]].  Generally, this is just called from
--- End diff --

nit: The max length of each line in the comment should also be 100, seems 
you are limiting that to 80?


---

-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org



[GitHub] spark pull request #19529: [SPARK-22308] Support alternative unit testing st...

2017-10-24 Thread jiangxb1987
Github user jiangxb1987 commented on a diff in the pull request:

https://github.com/apache/spark/pull/19529#discussion_r146595869
  
--- Diff: 
sql/core/src/test/scala/org/apache/spark/sql/test/SharedSQLContext.scala ---
@@ -17,86 +17,8 @@
 
 package org.apache.spark.sql.test
 
-import scala.concurrent.duration._
-
-import org.scalatest.BeforeAndAfterEach
-import org.scalatest.concurrent.Eventually
-
-import org.apache.spark.{DebugFilesystem, SparkConf}
-import org.apache.spark.sql.{SparkSession, SQLContext}
-import org.apache.spark.sql.internal.SQLConf
-
-/**
- * Helper trait for SQL test suites where all tests share a single 
[[TestSparkSession]].
- */
-trait SharedSQLContext extends SQLTestUtils with BeforeAndAfterEach with 
Eventually {
-
-  protected def sparkConf = {
-new SparkConf()
-  .set("spark.hadoop.fs.file.impl", classOf[DebugFilesystem].getName)
-  .set("spark.unsafe.exceptionOnMemoryLeak", "true")
-  .set(SQLConf.CODEGEN_FALLBACK.key, "false")
-  }
-
-  /**
-   * The [[TestSparkSession]] to use for all tests in this suite.
-   *
-   * By default, the underlying [[org.apache.spark.SparkContext]] will be 
run in local
-   * mode with the default test configurations.
-   */
-  private var _spark: TestSparkSession = null
-
-  /**
-   * The [[TestSparkSession]] to use for all tests in this suite.
-   */
-  protected implicit def spark: SparkSession = _spark
-
-  /**
-   * The [[TestSQLContext]] to use for all tests in this suite.
-   */
-  protected implicit def sqlContext: SQLContext = _spark.sqlContext
-
-  protected def createSparkSession: TestSparkSession = {
-new TestSparkSession(sparkConf)
-  }
-
-  /**
-   * Initialize the [[TestSparkSession]].
-   */
+trait SharedSQLContext extends SQLTestUtils with SharedSparkSession {
   protected override def beforeAll(): Unit = {
--- End diff --

Why do we still need this?


---

-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org



[GitHub] spark pull request #19529: [SPARK-22308] Support alternative unit testing st...

2017-10-24 Thread jiangxb1987
Github user jiangxb1987 commented on a diff in the pull request:

https://github.com/apache/spark/pull/19529#discussion_r146595345
  
--- Diff: 
sql/core/src/test/scala/org/apache/spark/sql/test/SQLTestUtils.scala ---
@@ -52,17 +55,142 @@ import org.apache.spark.util.{UninterruptibleThread, 
Utils}
  * Subclasses should *not* create `SQLContext`s in the test suite 
constructor, which is
  * prone to leaving multiple overlapping 
[[org.apache.spark.SparkContext]]s in the same JVM.
  */
-private[sql] trait SQLTestUtils
-  extends SparkFunSuite with Eventually
+private[sql] trait SQLTestUtils extends SparkFunSuite with 
SQLTestUtilsBase with PlanTest {
+  // Whether to materialize all test data before the first test is run
+  private var loadTestDataBeforeTests = false
+
+  protected override def beforeAll(): Unit = {
+super.beforeAll()
+if (loadTestDataBeforeTests) {
+  loadTestData()
+}
+  }
+
+  /**
+   * Materialize the test data immediately after the `SQLContext` is set 
up.
+   * This is necessary if the data is accessed by name but not through 
direct reference.
+   */
+  protected def setupTestData(): Unit = {
+loadTestDataBeforeTests = true
+  }
+
+  /**
+   * Disable stdout and stderr when running the test. To not output the 
logs to the console,
+   * ConsoleAppender's `follow` should be set to `true` so that it will 
honors reassignments of
+   * System.out or System.err. Otherwise, ConsoleAppender will still 
output to the console even if
+   * we change System.out and System.err.
+   */
+  protected def testQuietly(name: String)(f: => Unit): Unit = {
+test(name) {
+  quietly {
+f
+  }
+}
+  }
+
+  /**
+   * Run a test on a separate `UninterruptibleThread`.
+   */
+  protected def testWithUninterruptibleThread(name: String, quietly: 
Boolean = false)
+(body: => Unit): Unit = {
+val timeoutMillis = 1
+@transient var ex: Throwable = null
+
+def runOnThread(): Unit = {
+  val thread = new UninterruptibleThread(s"Testing thread for test 
$name") {
+override def run(): Unit = {
+  try {
+body
+  } catch {
+case NonFatal(e) =>
+  ex = e
+  }
+}
+  }
+  thread.setDaemon(true)
+  thread.start()
+  thread.join(timeoutMillis)
+  if (thread.isAlive) {
+thread.interrupt()
+// If this interrupt does not work, then this thread is most 
likely running something that
+// is not interruptible. There is not much point to wait for the 
thread to termniate, and
+// we rather let the JVM terminate the thread on exit.
+fail(
+  s"Test '$name' running on o.a.s.util.UninterruptibleThread timed 
out after" +
+s" $timeoutMillis ms")
+  } else if (ex != null) {
+throw ex
+  }
+}
+
+if (quietly) {
+  testQuietly(name) { runOnThread() }
+} else {
+  test(name) { runOnThread() }
+}
+  }
+}
+
+private[sql] object SQLTestUtils {
--- End diff --

Could you move this after `SQLTestUtilsBase` ?


---

-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org



[GitHub] spark pull request #19529: [SPARK-22308] Support alternative unit testing st...

2017-10-23 Thread nkronenfeld
Github user nkronenfeld commented on a diff in the pull request:

https://github.com/apache/spark/pull/19529#discussion_r146312166
  
--- Diff: 
sql/core/src/test/scala/org/apache/spark/sql/test/GenericFlatSpecSuite.scala ---
@@ -0,0 +1,45 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.test
+
+import org.scalatest.FlatSpec
+
+/**
+ * The purpose of this suite is to make sure that generic FlatSpec-based 
scala
+ * tests work with a shared spark session
+ */
+class GenericFlatSpecSuite extends FlatSpec with SharedSparkSession {
--- End diff --

Yeah, I wasn't totally sure about that either, but I eventually came to the 
conclusion that they are useful.  I think of it more in terms of preventing 
regressions; the case they will prevent is the FunSuite dependecy creeping back 
in to SharedSparkSession.

For similar reasons, I should probably put similar tests in for 
SharedSparkContext.


---

-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org



[GitHub] spark pull request #19529: [SPARK-22308] Support alternative unit testing st...

2017-10-23 Thread nkronenfeld
Github user nkronenfeld commented on a diff in the pull request:

https://github.com/apache/spark/pull/19529#discussion_r146311684
  
--- Diff: 
sql/core/src/test/scala/org/apache/spark/sql/test/SharedSparkSession.scala ---
@@ -0,0 +1,119 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.test
+
+import scala.concurrent.duration._
+
+import org.scalatest.{BeforeAndAfterEach, Suite}
+import org.scalatest.concurrent.Eventually
+
+import org.apache.spark.{DebugFilesystem, SparkConf}
+import org.apache.spark.sql.{SparkSession, SQLContext}
+import org.apache.spark.sql.internal.SQLConf
+
+/**
+ * Helper trait for SQL test suites where all tests share a single 
[[TestSparkSession]].
+ */
+trait SharedSparkSession
--- End diff --

I'm not sure what you're asking.

It's specific to the Spark SQL package - in that SparkSession resides there 
- but not specific to using the language SQL.  When I say, 'SQL test suites', 
I'm referring to the former.


---

-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org



[GitHub] spark pull request #19529: [SPARK-22308] Support alternative unit testing st...

2017-10-23 Thread nkronenfeld
Github user nkronenfeld commented on a diff in the pull request:

https://github.com/apache/spark/pull/19529#discussion_r146311180
  
--- Diff: 
sql/core/src/test/scala/org/apache/spark/sql/test/SQLTestUtils.scala ---
@@ -52,249 +36,23 @@ import org.apache.spark.util.{UninterruptibleThread, 
Utils}
  * Subclasses should *not* create `SQLContext`s in the test suite 
constructor, which is
  * prone to leaving multiple overlapping 
[[org.apache.spark.SparkContext]]s in the same JVM.
  */
-private[sql] trait SQLTestUtils
-  extends SparkFunSuite with Eventually
-  with BeforeAndAfterAll
-  with SQLTestData
-  with PlanTest { self =>
-
-  protected def sparkContext = spark.sparkContext
-
+private[sql] trait SQLTestUtils extends SparkFunSuite with 
SQLTestUtilsBase with PlanTest {
--- End diff --

This has more changes - I left the data initialization stuff in 
SQLTestUtils, as it wouldn't be needed by external tests.  All the <'s below 
are things that I pulled out of SQLTestUtilsBase and put back into SQLTestUtils

Running similarly:

git checkout 4a779bdac3e75c17b7d36c5a009ba6c948fa9fb6 SQLTestUtils.scala
diff SQLTestUtils.scala SQLTestUtilsBase.scala

I get

27d26
< import scala.util.control.NonFatal
30c29
< import org.scalatest.BeforeAndAfterAll
---
> import org.scalatest.{BeforeAndAfterAll, Suite}
33d31
< import org.apache.spark.SparkFunSuite
38c36
< import org.apache.spark.sql.catalyst.plans.PlanTest
---
> import org.apache.spark.sql.catalyst.plans.PlanTestBase
40d37
< import org.apache.spark.sql.catalyst.util._
43c40
< import org.apache.spark.util.{UninterruptibleThread, Utils}
---
> import org.apache.spark.util.Utils
46c43
<  * Helper trait that should be extended by all SQL test suites.
---
>  * Helper trait that can be extended by all external SQL test suites.
48,49c45
<  * This allows subclasses to plugin a custom `SQLContext`. It comes 
with test data
<  * prepared in advance as well as all implicit conversions used 
extensively by dataframes.
---
>  * This allows subclasses to plugin a custom `SQLContext`.
55,56c51,52
< private[sql] trait SQLTestUtils
<   extends SparkFunSuite with Eventually
---
> private[sql] trait SQLTestUtilsBase
>   extends Eventually
59c55
<   with PlanTest { self =>
---
>   with PlanTestBase { self: Suite =>
63,65d58
<   // Whether to materialize all test data before the first test is run
<   private var loadTestDataBeforeTests = false
< 
80,94d72
<   /**
<* Materialize the test data immediately after the `SQLContext` is 
set up.
<* This is necessary if the data is accessed by name but not 
through direct reference.
<*/
<   protected def setupTestData(): Unit = {
< loadTestDataBeforeTests = true
<   }
< 
<   protected override def beforeAll(): Unit = {
< super.beforeAll()
< if (loadTestDataBeforeTests) {
<   loadTestData()
< }
<   }
< 
300,354d277
<   /**
<* Disable stdout and stderr when running the test. To not output 
the logs to the console,
<* ConsoleAppender's `follow` should be set to `true` so that it 
will honors reassignments of
<* System.out or System.err. Otherwise, ConsoleAppender will still 
output to the console even if
<* we change System.out and System.err.
<*/
<   protected def testQuietly(name: String)(f: => Unit): Unit = {
< test(name) {
<   quietly {
< f
<   }
< }
<   }
< 
<   /**
<* Run a test on a separate `UninterruptibleThread`.
<*/
<   protected def testWithUninterruptibleThread(name: String, quietly: 
Boolean = false)
< (body: => Unit): Unit = {
< val timeoutMillis = 1
< @transient var ex: Throwable = null
< 
< def runOnThread(): Unit = {
<   val thread = new UninterruptibleThread(s"Testing thread for 
test $name") {
< override def run(): Unit = {
<   try {
< body
<   } catch {
< case NonFatal(e) =>
<   ex = e
<   }
< }
<   }
<   thread.setDaemon(true)
<   thread.start()
<   thread.join(timeoutMillis)
<   if (thread.isAlive) {
< thread.interrupt()
< // If this i

[GitHub] spark pull request #19529: [SPARK-22308] Support alternative unit testing st...

2017-10-23 Thread nkronenfeld
Github user nkronenfeld commented on a diff in the pull request:

https://github.com/apache/spark/pull/19529#discussion_r146308234
  
--- Diff: 
sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/PlanTest.scala 
---
@@ -18,158 +18,9 @@
 package org.apache.spark.sql.catalyst.plans
 
 import org.apache.spark.SparkFunSuite
-import org.apache.spark.sql.AnalysisException
-import org.apache.spark.sql.catalyst.analysis.SimpleAnalyzer
-import org.apache.spark.sql.catalyst.expressions._
-import 
org.apache.spark.sql.catalyst.expressions.aggregate.AggregateExpression
-import org.apache.spark.sql.catalyst.plans.logical._
-import org.apache.spark.sql.catalyst.util._
-import org.apache.spark.sql.internal.SQLConf
 
 /**
  * Provides helper methods for comparing plans.
  */
-trait PlanTest extends SparkFunSuite with PredicateHelper {
-
-  // TODO(gatorsmile): remove this from PlanTest and all the analyzer rules
-  protected def conf = SQLConf.get
-
-  /**
-   * Since attribute references are given globally unique ids during 
analysis,
-   * we must normalize them to check if two different queries are 
identical.
-   */
-  protected def normalizeExprIds(plan: LogicalPlan) = {
-plan transformAllExpressions {
-  case s: ScalarSubquery =>
-s.copy(exprId = ExprId(0))
-  case e: Exists =>
-e.copy(exprId = ExprId(0))
-  case l: ListQuery =>
-l.copy(exprId = ExprId(0))
-  case a: AttributeReference =>
-AttributeReference(a.name, a.dataType, a.nullable)(exprId = 
ExprId(0))
-  case a: Alias =>
-Alias(a.child, a.name)(exprId = ExprId(0))
-  case ae: AggregateExpression =>
-ae.copy(resultId = ExprId(0))
-}
-  }
-
-  /**
-   * Normalizes plans:
-   * - Filter the filter conditions that appear in a plan. For instance,
-   *   ((expr 1 && expr 2) && expr 3), (expr 1 && expr 2 && expr 3), (expr 
3 && (expr 1 && expr 2)
-   *   etc., will all now be equivalent.
-   * - Sample the seed will replaced by 0L.
-   * - Join conditions will be resorted by hashCode.
-   */
-  protected def normalizePlan(plan: LogicalPlan): LogicalPlan = {
-plan transform {
-  case Filter(condition: Expression, child: LogicalPlan) =>
-
Filter(splitConjunctivePredicates(condition).map(rewriteEqual).sortBy(_.hashCode())
-  .reduce(And), child)
-  case sample: Sample =>
-sample.copy(seed = 0L)
-  case Join(left, right, joinType, condition) if condition.isDefined =>
-val newCondition =
-  
splitConjunctivePredicates(condition.get).map(rewriteEqual).sortBy(_.hashCode())
-.reduce(And)
-Join(left, right, joinType, Some(newCondition))
-}
-  }
-
-  /**
-   * Rewrite [[EqualTo]] and [[EqualNullSafe]] operator to keep order. The 
following cases will be
-   * equivalent:
-   * 1. (a = b), (b = a);
-   * 2. (a <=> b), (b <=> a).
-   */
-  private def rewriteEqual(condition: Expression): Expression = condition 
match {
-case eq @ EqualTo(l: Expression, r: Expression) =>
-  Seq(l, r).sortBy(_.hashCode()).reduce(EqualTo)
-case eq @ EqualNullSafe(l: Expression, r: Expression) =>
-  Seq(l, r).sortBy(_.hashCode()).reduce(EqualNullSafe)
-case _ => condition // Don't reorder.
-  }
-
-  /** Fails the test if the two plans do not match */
-  protected def comparePlans(
-  plan1: LogicalPlan,
-  plan2: LogicalPlan,
-  checkAnalysis: Boolean = true): Unit = {
-if (checkAnalysis) {
-  // Make sure both plan pass checkAnalysis.
-  SimpleAnalyzer.checkAnalysis(plan1)
-  SimpleAnalyzer.checkAnalysis(plan2)
-}
-
-val normalized1 = normalizePlan(normalizeExprIds(plan1))
-val normalized2 = normalizePlan(normalizeExprIds(plan2))
-if (normalized1 != normalized2) {
-  fail(
-s"""
-  |== FAIL: Plans do not match ===
-  |${sideBySide(normalized1.treeString, 
normalized2.treeString).mkString("\n")}
- """.stripMargin)
-}
-  }
-
-  /** Fails the test if the two expressions do not match */
-  protected def compareExpressions(e1: Expression, e2: Expression): Unit = 
{
-comparePlans(Filter(e1, OneRowRelation()), Filter(e2, 
OneRowRelation()), checkAnalysis = false)
-  }
-
-  /** Fails the test if the join order in the two plans do not match */
-  protected def compareJoinOrder(plan1: LogicalPlan, plan2: LogicalPlan) {
-val normalized1 = normalizePlan(normalizeExprIds(plan1))
-val normalized2 = normalizePlan(normalizeExprIds(plan2))
-if (!sameJoinPlan(normalized1, normalized2))

[GitHub] spark pull request #19529: [SPARK-22308] Support alternative unit testing st...

2017-10-23 Thread srowen
Github user srowen commented on a diff in the pull request:

https://github.com/apache/spark/pull/19529#discussion_r146186223
  
--- Diff: 
sql/core/src/test/scala/org/apache/spark/sql/test/SharedSparkSession.scala ---
@@ -0,0 +1,119 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.test
+
+import scala.concurrent.duration._
+
+import org.scalatest.{BeforeAndAfterEach, Suite}
+import org.scalatest.concurrent.Eventually
+
+import org.apache.spark.{DebugFilesystem, SparkConf}
+import org.apache.spark.sql.{SparkSession, SQLContext}
+import org.apache.spark.sql.internal.SQLConf
+
+/**
+ * Helper trait for SQL test suites where all tests share a single 
[[TestSparkSession]].
+ */
+trait SharedSparkSession
--- End diff --

Is this actually specific to SQL?
And is it really for symmetry with SharedSparkContext?


---

-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org



[GitHub] spark pull request #19529: [SPARK-22308] Support alternative unit testing st...

2017-10-23 Thread srowen
Github user srowen commented on a diff in the pull request:

https://github.com/apache/spark/pull/19529#discussion_r146185400
  
--- Diff: 
sql/core/src/test/scala/org/apache/spark/sql/test/GenericFlatSpecSuite.scala ---
@@ -0,0 +1,45 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.test
+
+import org.scalatest.FlatSpec
+
+/**
+ * The purpose of this suite is to make sure that generic FlatSpec-based 
scala
+ * tests work with a shared spark session
+ */
+class GenericFlatSpecSuite extends FlatSpec with SharedSparkSession {
--- End diff --

These are testing tests? I get it, but I don't know if it's necessary. If 
FlatSpec didn't work, scalatest would hopefully catch it. If initializeSession 
/ SharedSparkSession didn't, presumably tons of tests wouldn't work.


---

-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org



[GitHub] spark pull request #19529: [SPARK-22308] Support alternative unit testing st...

2017-10-23 Thread srowen
Github user srowen commented on a diff in the pull request:

https://github.com/apache/spark/pull/19529#discussion_r146185476
  
--- Diff: 
sql/core/src/test/scala/org/apache/spark/sql/test/SQLTestUtils.scala ---
@@ -52,249 +36,23 @@ import org.apache.spark.util.{UninterruptibleThread, 
Utils}
  * Subclasses should *not* create `SQLContext`s in the test suite 
constructor, which is
  * prone to leaving multiple overlapping 
[[org.apache.spark.SparkContext]]s in the same JVM.
  */
-private[sql] trait SQLTestUtils
-  extends SparkFunSuite with Eventually
-  with BeforeAndAfterAll
-  with SQLTestData
-  with PlanTest { self =>
-
-  protected def sparkContext = spark.sparkContext
-
+private[sql] trait SQLTestUtils extends SparkFunSuite with 
SQLTestUtilsBase with PlanTest {
--- End diff --

Same general comment as PlanTestBase vs PlanTest above


---

-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org



[GitHub] spark pull request #19529: [SPARK-22308] Support alternative unit testing st...

2017-10-23 Thread srowen
Github user srowen commented on a diff in the pull request:

https://github.com/apache/spark/pull/19529#discussion_r146185226
  
--- Diff: 
sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/PlanTest.scala 
---
@@ -18,158 +18,9 @@
 package org.apache.spark.sql.catalyst.plans
 
 import org.apache.spark.SparkFunSuite
-import org.apache.spark.sql.AnalysisException
-import org.apache.spark.sql.catalyst.analysis.SimpleAnalyzer
-import org.apache.spark.sql.catalyst.expressions._
-import 
org.apache.spark.sql.catalyst.expressions.aggregate.AggregateExpression
-import org.apache.spark.sql.catalyst.plans.logical._
-import org.apache.spark.sql.catalyst.util._
-import org.apache.spark.sql.internal.SQLConf
 
 /**
  * Provides helper methods for comparing plans.
  */
-trait PlanTest extends SparkFunSuite with PredicateHelper {
-
-  // TODO(gatorsmile): remove this from PlanTest and all the analyzer rules
-  protected def conf = SQLConf.get
-
-  /**
-   * Since attribute references are given globally unique ids during 
analysis,
-   * we must normalize them to check if two different queries are 
identical.
-   */
-  protected def normalizeExprIds(plan: LogicalPlan) = {
-plan transformAllExpressions {
-  case s: ScalarSubquery =>
-s.copy(exprId = ExprId(0))
-  case e: Exists =>
-e.copy(exprId = ExprId(0))
-  case l: ListQuery =>
-l.copy(exprId = ExprId(0))
-  case a: AttributeReference =>
-AttributeReference(a.name, a.dataType, a.nullable)(exprId = 
ExprId(0))
-  case a: Alias =>
-Alias(a.child, a.name)(exprId = ExprId(0))
-  case ae: AggregateExpression =>
-ae.copy(resultId = ExprId(0))
-}
-  }
-
-  /**
-   * Normalizes plans:
-   * - Filter the filter conditions that appear in a plan. For instance,
-   *   ((expr 1 && expr 2) && expr 3), (expr 1 && expr 2 && expr 3), (expr 
3 && (expr 1 && expr 2)
-   *   etc., will all now be equivalent.
-   * - Sample the seed will replaced by 0L.
-   * - Join conditions will be resorted by hashCode.
-   */
-  protected def normalizePlan(plan: LogicalPlan): LogicalPlan = {
-plan transform {
-  case Filter(condition: Expression, child: LogicalPlan) =>
-
Filter(splitConjunctivePredicates(condition).map(rewriteEqual).sortBy(_.hashCode())
-  .reduce(And), child)
-  case sample: Sample =>
-sample.copy(seed = 0L)
-  case Join(left, right, joinType, condition) if condition.isDefined =>
-val newCondition =
-  
splitConjunctivePredicates(condition.get).map(rewriteEqual).sortBy(_.hashCode())
-.reduce(And)
-Join(left, right, joinType, Some(newCondition))
-}
-  }
-
-  /**
-   * Rewrite [[EqualTo]] and [[EqualNullSafe]] operator to keep order. The 
following cases will be
-   * equivalent:
-   * 1. (a = b), (b = a);
-   * 2. (a <=> b), (b <=> a).
-   */
-  private def rewriteEqual(condition: Expression): Expression = condition 
match {
-case eq @ EqualTo(l: Expression, r: Expression) =>
-  Seq(l, r).sortBy(_.hashCode()).reduce(EqualTo)
-case eq @ EqualNullSafe(l: Expression, r: Expression) =>
-  Seq(l, r).sortBy(_.hashCode()).reduce(EqualNullSafe)
-case _ => condition // Don't reorder.
-  }
-
-  /** Fails the test if the two plans do not match */
-  protected def comparePlans(
-  plan1: LogicalPlan,
-  plan2: LogicalPlan,
-  checkAnalysis: Boolean = true): Unit = {
-if (checkAnalysis) {
-  // Make sure both plan pass checkAnalysis.
-  SimpleAnalyzer.checkAnalysis(plan1)
-  SimpleAnalyzer.checkAnalysis(plan2)
-}
-
-val normalized1 = normalizePlan(normalizeExprIds(plan1))
-val normalized2 = normalizePlan(normalizeExprIds(plan2))
-if (normalized1 != normalized2) {
-  fail(
-s"""
-  |== FAIL: Plans do not match ===
-  |${sideBySide(normalized1.treeString, 
normalized2.treeString).mkString("\n")}
- """.stripMargin)
-}
-  }
-
-  /** Fails the test if the two expressions do not match */
-  protected def compareExpressions(e1: Expression, e2: Expression): Unit = 
{
-comparePlans(Filter(e1, OneRowRelation()), Filter(e2, 
OneRowRelation()), checkAnalysis = false)
-  }
-
-  /** Fails the test if the join order in the two plans do not match */
-  protected def compareJoinOrder(plan1: LogicalPlan, plan2: LogicalPlan) {
-val normalized1 = normalizePlan(normalizeExprIds(plan1))
-val normalized2 = normalizePlan(normalizeExprIds(plan2))
-if (!sameJoinPlan(normalized1, normalized2)) {