cdmikechen opened a new issue #774: Matching question of the version in Spark and Hive2 URL: https://github.com/apache/incubator-hudi/issues/774 When I used spark 2.3.3 and hive 2.3.3 to test hoodie, I found that if spark session `enableHiveSupport()`, it will match hive version in `org.apache.spark.sql.hive.client.IsolatedClientLoader`. ``` def hiveVersion(version: String): HiveVersion = version match { case "12" | "0.12" | "0.12.0" => hive.v12 case "13" | "0.13" | "0.13.0" | "0.13.1" => hive.v13 case "14" | "0.14" | "0.14.0" => hive.v14 case "1.0" | "1.0.0" => hive.v1_0 case "1.1" | "1.1.0" => hive.v1_1 case "1.2" | "1.2.0" | "1.2.1" | "1.2.2" => hive.v1_2 case "2.0" | "2.0.0" | "2.0.1" => hive.v2_0 case "2.1" | "2.1.0" | "2.1.1" => hive.v2_1 } ``` If I use a higher hive version to do something sql like `sparksession.sql("use database")`, it report this error: ``` scala.MatchError: 2.3.3 (of class java.lang.String) at org.apache.spark.sql.hive.client.IsolatedClientLoader$.hiveVersion(IsolatedClientLoader.scala:89) ~[spark-hive_2.11-2.3.3.jar:2.3.3] at org.apache.spark.sql.hive.HiveUtils$.newClientForMetadata(HiveUtils.scala:300) ~[spark-hive_2.11-2.3.3.jar:2.3.3] at org.apache.spark.sql.hive.HiveUtils$.newClientForMetadata(HiveUtils.scala:287) ~[spark-hive_2.11-2.3.3.jar:2.3.3] at org.apache.spark.sql.hive.HiveExternalCatalog.client$lzycompute(HiveExternalCatalog.scala:66) ~[spark-hive_2.11-2.3.3.jar:2.3.3] at org.apache.spark.sql.hive.HiveExternalCatalog.client(HiveExternalCatalog.scala:65) ~[spark-hive_2.11-2.3.3.jar:2.3.3] at org.apache.spark.sql.hive.HiveExternalCatalog$$anonfun$databaseExists$1.apply$mcZ$sp(HiveExternalCatalog.scala:195) ~[spark-hive_2.11-2.3.3.jar:2.3.3] at org.apache.spark.sql.hive.HiveExternalCatalog$$anonfun$databaseExists$1.apply(HiveExternalCatalog.scala:195) ~[spark-hive_2.11-2.3.3.jar:2.3.3] at org.apache.spark.sql.hive.HiveExternalCatalog$$anonfun$databaseExists$1.apply(HiveExternalCatalog.scala:195) ~[spark-hive_2.11-2.3.3.jar:2.3.3] at org.apache.spark.sql.hive.HiveExternalCatalog.withClient(HiveExternalCatalog.scala:97) ~[spark-hive_2.11-2.3.3.jar:2.3.3] at org.apache.spark.sql.hive.HiveExternalCatalog.databaseExists(HiveExternalCatalog.scala:194) ~[spark-hive_2.11-2.3.3.jar:2.3.3] at org.apache.spark.sql.internal.SharedState.externalCatalog$lzycompute(SharedState.scala:114) ~[spark-sql_2.11-2.3.3.jar:2.3.3] at org.apache.spark.sql.internal.SharedState.externalCatalog(SharedState.scala:102) ~[spark-sql_2.11-2.3.3.jar:2.3.3] at org.apache.spark.sql.hive.HiveSessionStateBuilder.externalCatalog(HiveSessionStateBuilder.scala:39) ~[spark-hive_2.11-2.3.3.jar:2.3.3] at org.apache.spark.sql.hive.HiveSessionStateBuilder.catalog$lzycompute(HiveSessionStateBuilder.scala:54) ~[spark-hive_2.11-2.3.3.jar:2.3.3] at org.apache.spark.sql.hive.HiveSessionStateBuilder.catalog(HiveSessionStateBuilder.scala:52) ~[spark-hive_2.11-2.3.3.jar:2.3.3] at org.apache.spark.sql.hive.HiveSessionStateBuilder$$anon$1.<init>(HiveSessionStateBuilder.scala:69) ~[spark-hive_2.11-2.3.3.jar:2.3.3] at org.apache.spark.sql.hive.HiveSessionStateBuilder.analyzer(HiveSessionStateBuilder.scala:69) ~[spark-hive_2.11-2.3.3.jar:2.3.3] at org.apache.spark.sql.internal.BaseSessionStateBuilder$$anonfun$build$2.apply(BaseSessionStateBuilder.scala:293) ~[spark-sql_2.11-2.3.3.jar:2.3.3] at org.apache.spark.sql.internal.BaseSessionStateBuilder$$anonfun$build$2.apply(BaseSessionStateBuilder.scala:293) ~[spark-sql_2.11-2.3.3.jar:2.3.3] at org.apache.spark.sql.internal.SessionState.analyzer$lzycompute(SessionState.scala:79) ~[spark-sql_2.11-2.3.3.jar:2.3.3] at org.apache.spark.sql.internal.SessionState.analyzer(SessionState.scala:79) ~[spark-sql_2.11-2.3.3.jar:2.3.3] at org.apache.spark.sql.execution.QueryExecution.analyzed$lzycompute(QueryExecution.scala:57) ~[spark-sql_2.11-2.3.3.jar:2.3.3] at org.apache.spark.sql.execution.QueryExecution.analyzed(QueryExecution.scala:55) ~[spark-sql_2.11-2.3.3.jar:2.3.3] at org.apache.spark.sql.execution.QueryExecution.assertAnalyzed(QueryExecution.scala:47) ~[spark-sql_2.11-2.3.3.jar:2.3.3] at org.apache.spark.sql.Dataset$.ofRows(Dataset.scala:74) ~[spark-sql_2.11-2.3.3.jar:2.3.3] at org.apache.spark.sql.SparkSession.sql(SparkSession.scala:642) ~[spark-sql_2.11-2.3.3.jar:2.3.3] ... ``` In QuickStart, it says hoodie can work in `spark-2.[1-3].x` with `hive-2.3.3`. So I think we may need to find an official recommended version to match hive2, like `hive-2.1.1`.
---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: [email protected] With regards, Apache Git Services
