This is an automated email from the ASF dual-hosted git repository. srowen pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new 5172190 [SPARK-27392][SQL] TestHive test tables should be placed in shared test state, not per session 5172190 is described below commit 5172190da19a2750e00e1eac00cebfbca1f3c173 Author: Eric Liang <e...@databricks.com> AuthorDate: Mon Apr 22 11:05:31 2019 -0700 [SPARK-27392][SQL] TestHive test tables should be placed in shared test state, not per session ## What changes were proposed in this pull request? Otherwise, tests that use tables from multiple sessions will run into issues if they access the same table. The correct location is in shared state. A couple other minor test improvements. cc gatorsmile srinathshankar ## How was this patch tested? Existing unit tests. Closes #24302 from ericl/test-conflicts. Lead-authored-by: Eric Liang <e...@databricks.com> Co-authored-by: Eric Liang <ekhli...@gmail.com> Signed-off-by: Sean Owen <sean.o...@databricks.com> --- .../spark/mllib/regression/JavaRidgeRegressionSuite.java | 7 +++++-- .../scala/org/apache/spark/sql/hive/test/TestHive.scala | 14 ++++++++------ 2 files changed, 13 insertions(+), 8 deletions(-) diff --git a/mllib/src/test/java/org/apache/spark/mllib/regression/JavaRidgeRegressionSuite.java b/mllib/src/test/java/org/apache/spark/mllib/regression/JavaRidgeRegressionSuite.java index cb00977..fb6c775 100644 --- a/mllib/src/test/java/org/apache/spark/mllib/regression/JavaRidgeRegressionSuite.java +++ b/mllib/src/test/java/org/apache/spark/mllib/regression/JavaRidgeRegressionSuite.java @@ -17,6 +17,7 @@ package org.apache.spark.mllib.regression; +import java.util.ArrayList; import java.util.List; import java.util.Random; @@ -55,7 +56,8 @@ public class JavaRidgeRegressionSuite extends SharedSparkSession { int numFeatures = 20; List<LabeledPoint> data = generateRidgeData(2 * numExamples, numFeatures, 10.0); - JavaRDD<LabeledPoint> testRDD = jsc.parallelize(data.subList(0, numExamples)); + JavaRDD<LabeledPoint> testRDD = jsc.parallelize( + new ArrayList<LabeledPoint>(data.subList(0, numExamples))); List<LabeledPoint> validationData = data.subList(numExamples, 2 * numExamples); RidgeRegressionWithSGD ridgeSGDImpl = new RidgeRegressionWithSGD(); @@ -79,7 +81,8 @@ public class JavaRidgeRegressionSuite extends SharedSparkSession { int numFeatures = 20; List<LabeledPoint> data = generateRidgeData(2 * numExamples, numFeatures, 10.0); - JavaRDD<LabeledPoint> testRDD = jsc.parallelize(data.subList(0, numExamples)); + JavaRDD<LabeledPoint> testRDD = jsc.parallelize( + new ArrayList<LabeledPoint>(data.subList(0, numExamples))); List<LabeledPoint> validationData = data.subList(numExamples, 2 * numExamples); RidgeRegressionModel model = RidgeRegressionWithSGD.train(testRDD.rdd(), 200, 1.0, 0.0); diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala index 1515807..e8a749f 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala @@ -92,6 +92,10 @@ private[hive] class TestHiveSharedState( hiveClient: Option[HiveClient] = None) extends SharedState(sc, initialConfigs = Map.empty[String, String]) { + // The set of loaded tables should be kept in shared state, since there may be multiple sessions + // created that want to use the same tables. + val loadedTables = new collection.mutable.HashSet[String] + override lazy val externalCatalog: ExternalCatalogWithListener = { new ExternalCatalogWithListener(new TestHiveExternalCatalog( sc.conf, @@ -491,14 +495,12 @@ private[hive] class TestHiveSparkSession( hiveQTestUtilTables.foreach(registerTestTable) } - private val loadedTables = new collection.mutable.HashSet[String] - - def getLoadedTables: collection.mutable.HashSet[String] = loadedTables + def getLoadedTables: collection.mutable.HashSet[String] = sharedState.loadedTables def loadTestTable(name: String) { - if (!(loadedTables contains name)) { + if (!sharedState.loadedTables.contains(name)) { // Marks the table as loaded first to prevent infinite mutually recursive table loading. - loadedTables += name + sharedState.loadedTables += name logDebug(s"Loading test table $name") val createCmds = testTables.get(name).map(_.commands).getOrElse(sys.error(s"Unknown test table $name")) @@ -545,7 +547,7 @@ private[hive] class TestHiveSparkSession( warehouseDir.mkdir() sharedState.cacheManager.clearCache() - loadedTables.clear() + sharedState.loadedTables.clear() sessionState.catalog.reset() metadataHive.reset() --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org