[ https://issues.apache.org/jira/browse/SPARK-27080?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
Wenchen Fan resolved SPARK-27080. --------------------------------- Resolution: Fixed Fix Version/s: 2.3.4 2.4.1 3.0.0 Issue resolved by pull request 24001 [https://github.com/apache/spark/pull/24001] > Read parquet file with merging metastore schema should compare schema field > in uniform case. > -------------------------------------------------------------------------------------------- > > Key: SPARK-27080 > URL: https://issues.apache.org/jira/browse/SPARK-27080 > Project: Spark > Issue Type: Bug > Components: SQL > Affects Versions: 2.3.2, 2.3.3, 2.4.0 > Reporter: BoMeng > Priority: Major > Fix For: 3.0.0, 2.4.1, 2.3.4 > > > In our product environment, when we upgrade spark from version 2.1 to 2.3, > the job failed with an exception as below: > ---ERROR stack trace – > Exception occur when running Job, > org.apache.spark.SparkException: Detected conflicting schemas when merging > the schema obtained from the Hive > Metastore with the one inferred from the file format. Metastore schema: > { > "type" : "struct", > "fields" : [ > ...... > } > Inferred schema: > { > "type" : "struct", > "fields" : [ > ...... > } > at > org.apache.spark.sql.hive.HiveMetastoreCatalog$.mergeWithMetastoreSchema(HiveMetastoreCatalog.scala:295) > at > org.apache.spark.sql.hive.HiveMetastoreCatalog$$anonfun$11.apply(HiveMetastoreCatalog.scala:243) > at > org.apache.spark.sql.hive.HiveMetastoreCatalog$$anonfun$11.apply(HiveMetastoreCatalog.scala:243) > at scala.Option.map(Option.scala:146) > at > org.apache.spark.sql.hive.HiveMetastoreCatalog.org$apache$spark$sql$hive$HiveMetastoreCatalog$$inferIfNeeded(HiveMetastoreCatalog.scala:243) > at > org.apache.spark.sql.hive.HiveMetastoreCatalog$$anonfun$4$$anonfun$5.apply(HiveMetastoreCatalog.scala:167) > at > org.apache.spark.sql.hive.HiveMetastoreCatalog$$anonfun$4$$anonfun$5.apply(HiveMetastoreCatalog.scala:156) > at scala.Option.getOrElse(Option.scala:121) > at > org.apache.spark.sql.hive.HiveMetastoreCatalog$$anonfun$4.apply(HiveMetastoreCatalog.scala:156) > at > org.apache.spark.sql.hive.HiveMetastoreCatalog$$anonfun$4.apply(HiveMetastoreCatalog.scala:148) > at > org.apache.spark.sql.hive.HiveMetastoreCatalog.withTableCreationLock(HiveMetastoreCatalog.scala:54) > at > org.apache.spark.sql.hive.HiveMetastoreCatalog.convertToLogicalRelation(HiveMetastoreCatalog.scala:148) > at > org.apache.spark.sql.hive.RelationConversions.org$apache$spark$sql$hive$RelationConversions$$convert(HiveStrategies.scala:195) > at > org.apache.spark.sql.hive.RelationConversions$$anonfun$apply$4.applyOrElse(HiveStrategies.scala:226) > at > org.apache.spark.sql.hive.RelationConversions$$anonfun$apply$4.applyOrElse(HiveStrategies.scala:215) > at > org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$transformUp$1.apply(TreeNode.scala:289) > at > org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$transformUp$1.apply(TreeNode.scala:289) > at > org.apache.spark.sql.catalyst.trees.CurrentOrigin$.withOrigin(TreeNode.scala:70) > at > org.apache.spark.sql.catalyst.trees.TreeNode.transformUp(TreeNode.scala:288) > at > org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$3.apply(TreeNode.scala:286) > at > org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$3.apply(TreeNode.scala:286) > at > org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$4.apply(TreeNode.scala:306) > at > org.apache.spark.sql.catalyst.trees.TreeNode.mapProductIterator(TreeNode.scala:187) > at > org.apache.spark.sql.catalyst.trees.TreeNode.mapChildren(TreeNode.scala:304) > at > org.apache.spark.sql.catalyst.trees.TreeNode.transformUp(TreeNode.scala:286) > at > org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$3.apply(TreeNode.scala:286) > at > org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$3.apply(TreeNode.scala:286) > at > org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$4.apply(TreeNode.scala:306) > at > org.apache.spark.sql.catalyst.trees.TreeNode.mapProductIterator(TreeNode.scala:187) > at > org.apache.spark.sql.catalyst.trees.TreeNode.mapChildren(TreeNode.scala:304) > at > org.apache.spark.sql.catalyst.trees.TreeNode.transformUp(TreeNode.scala:286) > at > org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$3.apply(TreeNode.scala:286) > at > org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$3.apply(TreeNode.scala:286) > at > org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$4.apply(TreeNode.scala:306) > at > org.apache.spark.sql.catalyst.trees.TreeNode.mapProductIterator(TreeNode.scala:187) > at > org.apache.spark.sql.catalyst.trees.TreeNode.mapChildren(TreeNode.scala:304) > at > org.apache.spark.sql.catalyst.trees.TreeNode.transformUp(TreeNode.scala:286) > at > org.apache.spark.sql.hive.RelationConversions.apply(HiveStrategies.scala:215) > at > org.apache.spark.sql.hive.RelationConversions.apply(HiveStrategies.scala:180) > > The following case can trigger the exception, so we think it's a bug in > spark2.3 > {code:java} > // Parquet schema is subset of metaStore schema and has uppercase field name > assertResult( > StructType(Seq( > StructField("UPPERCase", DoubleType, nullable = true), > StructField("lowerCase", BinaryType, nullable = true)))) { > HiveMetastoreCatalog.mergeWithMetastoreSchema( > StructType(Seq( > StructField("UPPERCase", DoubleType, nullable = true), > StructField("lowerCase", BinaryType, nullable = true))), > StructType(Seq( > StructField("lowerCase", BinaryType, nullable = true)))) > } > {code} -- This message was sent by Atlassian JIRA (v7.6.3#76005) --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@spark.apache.org For additional commands, e-mail: issues-h...@spark.apache.org