Repository: spark Updated Branches: refs/heads/master 8a7db8a60 -> dd724c84c
[SPARK-18989][SQL] DESC TABLE should not fail with format class not found ## What changes were proposed in this pull request? When we describe a table, we only wanna see the information of this table, not read it, so it's ok even if the format class is not present at the classpath. ## How was this patch tested? new regression test Author: Wenchen Fan <[email protected]> Closes #16388 from cloud-fan/hive. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/dd724c84 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/dd724c84 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/dd724c84 Branch: refs/heads/master Commit: dd724c84c830d30385712d72b65e2a76a2ade700 Parents: 8a7db8a Author: Wenchen Fan <[email protected]> Authored: Mon Dec 26 11:27:56 2016 -0800 Committer: gatorsmile <[email protected]> Committed: Mon Dec 26 11:27:56 2016 -0800 ---------------------------------------------------------------------- .../spark/sql/hive/client/HiveClientImpl.scala | 11 ++++- .../spark/sql/hive/HiveSparkSubmitSuite.scala | 46 ++++++++++++++++++++ 2 files changed, 55 insertions(+), 2 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/spark/blob/dd724c84/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala ---------------------------------------------------------------------- diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala index bacae8a..5c0e2f6 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala @@ -408,8 +408,15 @@ private[hive] class HiveClientImpl( lastAccessTime = h.getLastAccessTime.toLong * 1000, storage = CatalogStorageFormat( locationUri = shim.getDataLocation(h), - inputFormat = Option(h.getInputFormatClass).map(_.getName), - outputFormat = Option(h.getOutputFormatClass).map(_.getName), + // To avoid ClassNotFound exception, we try our best to not get the format class, but get + // the class name directly. However, for non-native tables, there is no interface to get + // the format class name, so we may still throw ClassNotFound in this case. + inputFormat = Option(h.getTTable.getSd.getInputFormat).orElse { + Option(h.getStorageHandler).map(_.getInputFormatClass.getName) + }, + outputFormat = Option(h.getTTable.getSd.getOutputFormat).orElse { + Option(h.getStorageHandler).map(_.getOutputFormatClass.getName) + }, serde = Option(h.getSerializationLib), compressed = h.getTTable.getSd.isCompressed, properties = Option(h.getTTable.getSd.getSerdeInfo.getParameters) http://git-wip-us.apache.org/repos/asf/spark/blob/dd724c84/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSparkSubmitSuite.scala ---------------------------------------------------------------------- diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSparkSubmitSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSparkSubmitSuite.scala index a670560..9aa9ebf 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSparkSubmitSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSparkSubmitSuite.scala @@ -311,6 +311,29 @@ class HiveSparkSubmitSuite runSparkSubmit(args) } + test("SPARK-18989: DESC TABLE should not fail with format class not found") { + val unusedJar = TestUtils.createJarWithClasses(Seq.empty) + + val argsForCreateTable = Seq( + "--class", SPARK_18989_CREATE_TABLE.getClass.getName.stripSuffix("$"), + "--name", "SPARK-18947", + "--master", "local-cluster[2,1,1024]", + "--conf", "spark.ui.enabled=false", + "--conf", "spark.master.rest.enabled=false", + "--jars", TestHive.getHiveFile("hive-contrib-0.13.1.jar").getCanonicalPath, + unusedJar.toString) + runSparkSubmit(argsForCreateTable) + + val argsForShowTables = Seq( + "--class", SPARK_18989_DESC_TABLE.getClass.getName.stripSuffix("$"), + "--name", "SPARK-18947", + "--master", "local-cluster[2,1,1024]", + "--conf", "spark.ui.enabled=false", + "--conf", "spark.master.rest.enabled=false", + unusedJar.toString) + runSparkSubmit(argsForShowTables) + } + // NOTE: This is an expensive operation in terms of time (10 seconds+). Use sparingly. // This is copied from org.apache.spark.deploy.SparkSubmitSuite private def runSparkSubmit(args: Seq[String]): Unit = { @@ -853,3 +876,26 @@ object SPARK_18360 { } } } + +object SPARK_18989_CREATE_TABLE { + def main(args: Array[String]): Unit = { + val spark = SparkSession.builder().enableHiveSupport().getOrCreate() + spark.sql( + """ + |CREATE TABLE IF NOT EXISTS base64_tbl(val string) STORED AS + |INPUTFORMAT 'org.apache.hadoop.hive.contrib.fileformat.base64.Base64TextInputFormat' + |OUTPUTFORMAT 'org.apache.hadoop.hive.contrib.fileformat.base64.Base64TextOutputFormat' + """.stripMargin) + } +} + +object SPARK_18989_DESC_TABLE { + def main(args: Array[String]): Unit = { + val spark = SparkSession.builder().enableHiveSupport().getOrCreate() + try { + spark.sql("DESC base64_tbl") + } finally { + spark.sql("DROP TABLE IF EXISTS base64_tbl") + } + } +} --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
