This is an automated email from the ASF dual-hosted git repository. dongjoon pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new 24180c0 [SPARK-27831][SQL][TEST][test-hadoop3.2] Move Hive test jars to maven dependency 24180c0 is described below commit 24180c00e0d2a7c907e5408fc5f5112cb2889681 Author: Yuming Wang <yumw...@ebay.com> AuthorDate: Fri May 24 10:33:34 2019 -0700 [SPARK-27831][SQL][TEST][test-hadoop3.2] Move Hive test jars to maven dependency ## What changes were proposed in this pull request? This pr moves Hive test jars(`hive-contrib-0.13.1.jar`, `hive-hcatalog-core-0.13.1.jar`, `hive-contrib-2.3.5.jar` and `hive-hcatalog-core-2.3.5.jar`) to maven dependency. ## How was this patch tested? Existing test Closes #24695 from wangyum/SPARK-27831. Authored-by: Yuming Wang <yumw...@ebay.com> Signed-off-by: Dongjoon Hyun <dh...@apple.com> --- pom.xml | 69 +++++++++++++++++++++ sql/hive-thriftserver/pom.xml | 7 +++ .../spark/sql/hive/thriftserver/CliSuite.scala | 6 +- .../thriftserver/HiveThriftServer2Suites.scala | 6 +- sql/hive/pom.xml | 8 +++ .../src/test/resources/hive-contrib-0.13.1.jar | Bin 114878 -> 0 bytes sql/hive/src/test/resources/hive-contrib-2.3.5.jar | Bin 125843 -> 0 bytes .../test/resources/hive-hcatalog-core-0.13.1.jar | Bin 468533 -> 0 bytes .../test/resources/hive-hcatalog-core-2.3.5.jar | Bin 263956 -> 0 bytes .../spark/sql/hive/HiveSparkSubmitSuite.scala | 14 ++--- .../spark/sql/hive/execution/HiveQuerySuite.scala | 8 +-- .../spark/sql/hive/execution/SQLQuerySuite.scala | 4 +- .../apache/spark/sql/hive/test/HiveTestUtils.scala | 32 ++++++++++ .../org/apache/spark/sql/hive/test/TestHive.scala | 13 ---- 14 files changed, 133 insertions(+), 34 deletions(-) diff --git a/pom.xml b/pom.xml index 00d1374..b0433cb 100644 --- a/pom.xml +++ b/pom.xml @@ -1937,6 +1937,75 @@ </dependency> <dependency> + <groupId>${hive.group}</groupId> + <artifactId>hive-contrib</artifactId> + <version>${hive.version}</version> + <scope>test</scope> + <exclusions> + <exclusion> + <groupId>${hive.group}</groupId> + <artifactId>hive-exec</artifactId> + </exclusion> + <exclusion> + <groupId>${hive.group}</groupId> + <artifactId>hive-serde</artifactId> + </exclusion> + <exclusion> + <groupId>${hive.group}</groupId> + <artifactId>hive-shims</artifactId> + </exclusion> + <exclusion> + <groupId>commons-codec</groupId> + <artifactId>commons-codec</artifactId> + </exclusion> + <exclusion> + <groupId>org.slf4j</groupId> + <artifactId>slf4j-api</artifactId> + </exclusion> + </exclusions> + </dependency> + <dependency> + <groupId>${hive.group}.hcatalog</groupId> + <artifactId>hive-hcatalog-core</artifactId> + <version>${hive.version}</version> + <scope>test</scope> + <exclusions> + <exclusion> + <groupId>${hive.group}</groupId> + <artifactId>hive-exec</artifactId> + </exclusion> + <exclusion> + <groupId>${hive.group}</groupId> + <artifactId>hive-metastore</artifactId> + </exclusion> + <exclusion> + <groupId>${hive.group}</groupId> + <artifactId>hive-cli</artifactId> + </exclusion> + <exclusion> + <groupId>${hive.group}</groupId> + <artifactId>hive-common</artifactId> + </exclusion> + <exclusion> + <groupId>com.google.guava</groupId> + <artifactId>guava</artifactId> + </exclusion> + <exclusion> + <groupId>org.slf4j</groupId> + <artifactId>slf4j-api</artifactId> + </exclusion> + <exclusion> + <groupId>org.codehaus.jackson</groupId> + <artifactId>jackson-mapper-asl</artifactId> + </exclusion> + <exclusion> + <groupId>org.apache.hadoop</groupId> + <artifactId>*</artifactId> + </exclusion> + </exclusions> + </dependency> + + <dependency> <groupId>org.apache.orc</groupId> <artifactId>orc-core</artifactId> <version>${orc.version}</version> diff --git a/sql/hive-thriftserver/pom.xml b/sql/hive-thriftserver/pom.xml index 70d244e..aa4fbf7 100644 --- a/sql/hive-thriftserver/pom.xml +++ b/sql/hive-thriftserver/pom.xml @@ -42,6 +42,13 @@ </dependency> <dependency> <groupId>org.apache.spark</groupId> + <artifactId>spark-hive_${scala.binary.version}</artifactId> + <version>${project.version}</version> + <type>test-jar</type> + <scope>test</scope> + </dependency> + <dependency> + <groupId>org.apache.spark</groupId> <artifactId>spark-core_${scala.binary.version}</artifactId> <version>${project.version}</version> <type>test-jar</type> diff --git a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala index 933fd73..4ccc17c 100644 --- a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala +++ b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala @@ -31,6 +31,7 @@ import org.scalatest.BeforeAndAfterAll import org.apache.spark.SparkFunSuite import org.apache.spark.internal.Logging +import org.apache.spark.sql.hive.test.HiveTestUtils import org.apache.spark.sql.test.ProcessTestUtils.ProcessOutputCapturer import org.apache.spark.util.{ThreadUtils, Utils} @@ -200,10 +201,7 @@ class CliSuite extends SparkFunSuite with BeforeAndAfterAll with Logging { } test("Commands using SerDe provided in --jars") { - val jarFile = - "../hive/src/test/resources/hive-hcatalog-core-0.13.1.jar" - .split("/") - .mkString(File.separator) + val jarFile = HiveTestUtils.getHiveHcatalogCoreJar.getCanonicalPath val dataFilePath = Thread.currentThread().getContextClassLoader.getResource("data/files/small_kv.txt") diff --git a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/HiveThriftServer2Suites.scala b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/HiveThriftServer2Suites.scala index ef7c500..ee1f91a 100644 --- a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/HiveThriftServer2Suites.scala +++ b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/HiveThriftServer2Suites.scala @@ -44,6 +44,7 @@ import org.scalatest.BeforeAndAfterAll import org.apache.spark.{SparkException, SparkFunSuite} import org.apache.spark.internal.Logging import org.apache.spark.sql.hive.HiveUtils +import org.apache.spark.sql.hive.test.HiveTestUtils import org.apache.spark.sql.test.ProcessTestUtils.ProcessOutputCapturer import org.apache.spark.util.{ThreadUtils, Utils} @@ -484,10 +485,7 @@ class HiveThriftBinaryServerSuite extends HiveThriftJdbcTest { withMultipleConnectionJdbcStatement("smallKV", "addJar")( { statement => - val jarFile = - "../hive/src/test/resources/hive-hcatalog-core-0.13.1.jar" - .split("/") - .mkString(File.separator) + val jarFile = HiveTestUtils.getHiveHcatalogCoreJar.getCanonicalPath statement.executeQuery(s"ADD JAR $jarFile") }, diff --git a/sql/hive/pom.xml b/sql/hive/pom.xml index f627227..d37f0c8 100644 --- a/sql/hive/pom.xml +++ b/sql/hive/pom.xml @@ -103,6 +103,14 @@ <groupId>${hive.group}</groupId> <artifactId>hive-metastore</artifactId> </dependency> + <dependency> + <groupId>${hive.group}</groupId> + <artifactId>hive-contrib</artifactId> + </dependency> + <dependency> + <groupId>${hive.group}.hcatalog</groupId> + <artifactId>hive-hcatalog-core</artifactId> + </dependency> <!-- <dependency> <groupId>${hive.group}</groupId> diff --git a/sql/hive/src/test/resources/hive-contrib-0.13.1.jar b/sql/hive/src/test/resources/hive-contrib-0.13.1.jar deleted file mode 100644 index ce0740d..0000000 Binary files a/sql/hive/src/test/resources/hive-contrib-0.13.1.jar and /dev/null differ diff --git a/sql/hive/src/test/resources/hive-contrib-2.3.5.jar b/sql/hive/src/test/resources/hive-contrib-2.3.5.jar deleted file mode 100644 index 9549fc2..0000000 Binary files a/sql/hive/src/test/resources/hive-contrib-2.3.5.jar and /dev/null differ diff --git a/sql/hive/src/test/resources/hive-hcatalog-core-0.13.1.jar b/sql/hive/src/test/resources/hive-hcatalog-core-0.13.1.jar deleted file mode 100644 index 37af9aa..0000000 Binary files a/sql/hive/src/test/resources/hive-hcatalog-core-0.13.1.jar and /dev/null differ diff --git a/sql/hive/src/test/resources/hive-hcatalog-core-2.3.5.jar b/sql/hive/src/test/resources/hive-hcatalog-core-2.3.5.jar deleted file mode 100644 index 0fb689f..0000000 Binary files a/sql/hive/src/test/resources/hive-hcatalog-core-2.3.5.jar and /dev/null differ diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSparkSubmitSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSparkSubmitSuite.scala index 3b64a6b..9961a98 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSparkSubmitSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSparkSubmitSuite.scala @@ -32,7 +32,7 @@ import org.apache.spark.sql.catalyst.{FunctionIdentifier, TableIdentifier} import org.apache.spark.sql.catalyst.catalog._ import org.apache.spark.sql.execution.command.DDLUtils import org.apache.spark.sql.expressions.Window -import org.apache.spark.sql.hive.test.{TestHive, TestHiveContext} +import org.apache.spark.sql.hive.test.{HiveTestUtils, TestHive, TestHiveContext} import org.apache.spark.sql.types.{DecimalType, StructType} import org.apache.spark.tags.ExtendedHiveTest import org.apache.spark.util.{ResetSystemProperties, Utils} @@ -108,8 +108,8 @@ class HiveSparkSubmitSuite val unusedJar = TestUtils.createJarWithClasses(Seq.empty) val jar1 = TestUtils.createJarWithClasses(Seq("SparkSubmitClassA")) val jar2 = TestUtils.createJarWithClasses(Seq("SparkSubmitClassB")) - val jar3 = TestHive.getHiveContribJar().getCanonicalPath - val jar4 = TestHive.getHiveHcatalogCoreJar().getCanonicalPath + val jar3 = HiveTestUtils.getHiveContribJar.getCanonicalPath + val jar4 = HiveTestUtils.getHiveHcatalogCoreJar.getCanonicalPath val jarsString = Seq(jar1, jar2, jar3, jar4).map(j => j.toString).mkString(",") val args = Seq( "--class", SparkSubmitClassLoaderTest.getClass.getName.stripSuffix("$"), @@ -315,7 +315,7 @@ class HiveSparkSubmitSuite "--master", "local-cluster[2,1,1024]", "--conf", "spark.ui.enabled=false", "--conf", "spark.master.rest.enabled=false", - "--jars", TestHive.getHiveContribJar().getCanonicalPath, + "--jars", HiveTestUtils.getHiveContribJar.getCanonicalPath, unusedJar.toString) runSparkSubmit(argsForCreateTable) @@ -457,7 +457,7 @@ object TemporaryHiveUDFTest extends Logging { // Load a Hive UDF from the jar. logInfo("Registering a temporary Hive UDF provided in a jar.") - val jar = hiveContext.getHiveContribJar().getCanonicalPath + val jar = HiveTestUtils.getHiveContribJar.getCanonicalPath hiveContext.sql( s""" |CREATE TEMPORARY FUNCTION example_max @@ -495,7 +495,7 @@ object PermanentHiveUDFTest1 extends Logging { // Load a Hive UDF from the jar. logInfo("Registering a permanent Hive UDF provided in a jar.") - val jar = hiveContext.getHiveContribJar().getCanonicalPath + val jar = HiveTestUtils.getHiveContribJar.getCanonicalPath hiveContext.sql( s""" |CREATE FUNCTION example_max @@ -532,7 +532,7 @@ object PermanentHiveUDFTest2 extends Logging { val hiveContext = new TestHiveContext(sc) // Load a Hive UDF from the jar. logInfo("Write the metadata of a permanent Hive UDF into metastore.") - val jar = hiveContext.getHiveContribJar().getCanonicalPath + val jar = HiveTestUtils.getHiveContribJar.getCanonicalPath val function = CatalogFunction( FunctionIdentifier("example_max"), "org.apache.hadoop.hive.contrib.udaf.example.UDAFExampleMax", diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala index c35ff80..df2f693 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala @@ -34,7 +34,7 @@ import org.apache.spark.sql.catalyst.parser.ParseException import org.apache.spark.sql.catalyst.plans.logical.Project import org.apache.spark.sql.execution.joins.BroadcastNestedLoopJoinExec import org.apache.spark.sql.hive._ -import org.apache.spark.sql.hive.test.TestHive +import org.apache.spark.sql.hive.test.{HiveTestUtils, TestHive} import org.apache.spark.sql.hive.test.TestHive._ import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.test.SQLTestUtils @@ -816,7 +816,7 @@ class HiveQuerySuite extends HiveComparisonTest with SQLTestUtils with BeforeAnd test("ADD JAR command 2") { // this is a test case from mapjoin_addjar.q - val testJar = TestHive.getHiveHcatalogCoreJar().toURI + val testJar = HiveTestUtils.getHiveHcatalogCoreJar.toURI val testData = TestHive.getHiveFile("data/files/sample.json").toURI sql(s"ADD JAR $testJar") sql( @@ -826,9 +826,9 @@ class HiveQuerySuite extends HiveComparisonTest with SQLTestUtils with BeforeAnd sql("select * from src join t1 on src.key = t1.a") sql("DROP TABLE t1") assert(sql("list jars"). - filter(_.getString(0).contains(TestHive.HIVE_HCATALOG_CORE_JAR)).count() > 0) + filter(_.getString(0).contains(HiveTestUtils.getHiveHcatalogCoreJar.getName)).count() > 0) assert(sql("list jar"). - filter(_.getString(0).contains(TestHive.HIVE_HCATALOG_CORE_JAR)).count() > 0) + filter(_.getString(0).contains(HiveTestUtils.getHiveHcatalogCoreJar.getName)).count() > 0) val testJar2 = TestHive.getHiveFile("TestUDTF.jar").getCanonicalPath sql(s"ADD JAR $testJar2") assert(sql(s"list jar $testJar").count() == 1) diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala index 29de55f..56c16c8 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala @@ -37,7 +37,7 @@ import org.apache.spark.sql.execution.command.LoadDataCommand import org.apache.spark.sql.execution.datasources.{HadoopFsRelation, LogicalRelation} import org.apache.spark.sql.functions._ import org.apache.spark.sql.hive.{HiveExternalCatalog, HiveUtils} -import org.apache.spark.sql.hive.test.TestHiveSingleton +import org.apache.spark.sql.hive.test.{HiveTestUtils, TestHiveSingleton} import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.test.SQLTestUtils import org.apache.spark.sql.types._ @@ -1105,7 +1105,7 @@ class SQLQuerySuite extends QueryTest with SQLTestUtils with TestHiveSingleton { override def run() { // To make sure this test works, this jar should not be loaded in another place. sql( - s"ADD JAR ${hiveContext.getHiveContribJar().getCanonicalPath}") + s"ADD JAR ${HiveTestUtils.getHiveContribJar.getCanonicalPath}") try { sql( """ diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/test/HiveTestUtils.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/test/HiveTestUtils.scala new file mode 100644 index 0000000..7631efe --- /dev/null +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/test/HiveTestUtils.scala @@ -0,0 +1,32 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.hive.test + +import java.io.File + +import org.apache.hadoop.hive.contrib.udaf.example.UDAFExampleMax +import org.apache.hive.hcatalog.data.JsonSerDe + +object HiveTestUtils { + + val getHiveContribJar: File = + new File(classOf[UDAFExampleMax].getProtectionDomain.getCodeSource.getLocation.getPath) + + val getHiveHcatalogCoreJar: File = + new File(classOf[JsonSerDe].getProtectionDomain.getCodeSource.getLocation.getPath) +} diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/test/TestHive.scala similarity index 98% rename from sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala rename to sql/hive/src/test/scala/org/apache/spark/sql/hive/test/TestHive.scala index 6976435..5e77cac 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/test/TestHive.scala @@ -123,11 +123,6 @@ class TestHiveContext( @transient override val sparkSession: TestHiveSparkSession) extends SQLContext(sparkSession) { - val HIVE_CONTRIB_JAR: String = - if (HiveUtils.isHive23) "hive-contrib-2.3.5.jar" else "hive-contrib-0.13.1.jar" - val HIVE_HCATALOG_CORE_JAR: String = - if (HiveUtils.isHive23) "hive-hcatalog-core-2.3.5.jar" else "hive-hcatalog-core-0.13.1.jar" - /** * If loadTestTables is false, no test tables are loaded. Note that this flag can only be true * when running in the JVM, i.e. it needs to be false when calling from Python. @@ -154,14 +149,6 @@ class TestHiveContext( sparkSession.getHiveFile(path) } - def getHiveContribJar(): File = { - sparkSession.getHiveFile(HIVE_CONTRIB_JAR) - } - - def getHiveHcatalogCoreJar(): File = { - sparkSession.getHiveFile(HIVE_HCATALOG_CORE_JAR) - } - def loadTestTable(name: String): Unit = { sparkSession.loadTestTable(name) } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org