[ https://issues.apache.org/jira/browse/SPARK-27421?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
Yuming Wang updated SPARK-27421: -------------------------------- Parent: SPARK-33537 Issue Type: Sub-task (was: Bug) > RuntimeException when querying a view on a partitioned parquet table > -------------------------------------------------------------------- > > Key: SPARK-27421 > URL: https://issues.apache.org/jira/browse/SPARK-27421 > Project: Spark > Issue Type: Sub-task > Components: SQL > Affects Versions: 2.4.0, 2.4.1 > Environment: Using Scala version 2.11.12 (Java HotSpot(TM) 64-Bit > Server VM, Java 1.8.0_141) > Reporter: Eric Maynard > Assignee: Yuming Wang > Priority: Minor > Fix For: 2.4.8, 3.0.2, 3.1.0 > > > When running a simple query, I get the following stacktrace: > {code} > java.lang.RuntimeException: Caught Hive MetaException attempting to get > partition metadata by filter from Hive. You can set the Spark configuration > setting spark.sql.hive.manageFilesourcePartitions to false to work around > this problem, however this will result in degraded performance. Please report > a bug: https://issues.apache.org/jira/browse/SPARK > at > org.apache.spark.sql.hive.client.Shim_v0_13.getPartitionsByFilter(HiveShim.scala:772) > at > org.apache.spark.sql.hive.client.HiveClientImpl$$anonfun$getPartitionsByFilter$1.apply(HiveClientImpl.scala:686) > at > org.apache.spark.sql.hive.client.HiveClientImpl$$anonfun$getPartitionsByFilter$1.apply(HiveClientImpl.scala:684) > at > org.apache.spark.sql.hive.client.HiveClientImpl$$anonfun$withHiveState$1.apply(HiveClientImpl.scala:283) > at > org.apache.spark.sql.hive.client.HiveClientImpl.liftedTree1$1(HiveClientImpl.scala:221) > at > org.apache.spark.sql.hive.client.HiveClientImpl.retryLocked(HiveClientImpl.scala:220) > at > org.apache.spark.sql.hive.client.HiveClientImpl.withHiveState(HiveClientImpl.scala:266) > at > org.apache.spark.sql.hive.client.HiveClientImpl.getPartitionsByFilter(HiveClientImpl.scala:684) > at > org.apache.spark.sql.hive.HiveExternalCatalog$$anonfun$listPartitionsByFilter$1.apply(HiveExternalCatalog.scala:1268) > at > org.apache.spark.sql.hive.HiveExternalCatalog$$anonfun$listPartitionsByFilter$1.apply(HiveExternalCatalog.scala:1261) > at > org.apache.spark.sql.hive.HiveExternalCatalog.withClient(HiveExternalCatalog.scala:99) > at > org.apache.spark.sql.hive.HiveExternalCatalog.listPartitionsByFilter(HiveExternalCatalog.scala:1261) > at > org.apache.spark.sql.catalyst.catalog.ExternalCatalogWithListener.listPartitionsByFilter(ExternalCatalogWithListener.scala:262) > at > org.apache.spark.sql.catalyst.catalog.SessionCatalog.listPartitionsByFilter(SessionCatalog.scala:957) > at > org.apache.spark.sql.execution.datasources.CatalogFileIndex.filterPartitions(CatalogFileIndex.scala:73) > at > org.apache.spark.sql.execution.datasources.PruneFileSourcePartitions$$anonfun$apply$1.applyOrElse(PruneFileSourcePartitions.scala:63) > at > org.apache.spark.sql.execution.datasources.PruneFileSourcePartitions$$anonfun$apply$1.applyOrElse(PruneFileSourcePartitions.scala:27) > at > org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$2.apply(TreeNode.scala:256) > at > org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$2.apply(TreeNode.scala:256) > at > org.apache.spark.sql.catalyst.trees.CurrentOrigin$.withOrigin(TreeNode.scala:70) > at > org.apache.spark.sql.catalyst.trees.TreeNode.transformDown(TreeNode.scala:255) > at > org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.org$apache$spark$sql$catalyst$plans$logical$AnalysisHelper$$super$transformDown(LogicalPlan.scala:29) > at > org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper$class.transformDown(AnalysisHelper.scala:149) > at > org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDown(LogicalPlan.scala:29) > at > org.apache.spark.sql.execution.datasources.PruneFileSourcePartitions$.apply(PruneFileSourcePartitions.scala:27) > at > org.apache.spark.sql.execution.datasources.PruneFileSourcePartitions$.apply(PruneFileSourcePartitions.scala:26) > at > org.apache.spark.sql.catalyst.rules.RuleExecutor$$anonfun$execute$1$$anonfun$apply$1.apply(RuleExecutor.scala:87) > at > org.apache.spark.sql.catalyst.rules.RuleExecutor$$anonfun$execute$1$$anonfun$apply$1.apply(RuleExecutor.scala:84) > at > scala.collection.IndexedSeqOptimized$class.foldl(IndexedSeqOptimized.scala:57) > at > scala.collection.IndexedSeqOptimized$class.foldLeft(IndexedSeqOptimized.scala:66) > at scala.collection.mutable.WrappedArray.foldLeft(WrappedArray.scala:35) > at > org.apache.spark.sql.catalyst.rules.RuleExecutor$$anonfun$execute$1.apply(RuleExecutor.scala:84) > at > org.apache.spark.sql.catalyst.rules.RuleExecutor$$anonfun$execute$1.apply(RuleExecutor.scala:76) > at scala.collection.immutable.List.foreach(List.scala:392) > at > org.apache.spark.sql.catalyst.rules.RuleExecutor.execute(RuleExecutor.scala:76) > at > org.apache.spark.sql.execution.QueryExecution.optimizedPlan$lzycompute(QueryExecution.scala:66) > at > org.apache.spark.sql.execution.QueryExecution.optimizedPlan(QueryExecution.scala:66) > at > org.apache.spark.sql.execution.QueryExecution.sparkPlan$lzycompute(QueryExecution.scala:72) > at > org.apache.spark.sql.execution.QueryExecution.sparkPlan(QueryExecution.scala:68) > at > org.apache.spark.sql.execution.QueryExecution.executedPlan$lzycompute(QueryExecution.scala:77) > at > org.apache.spark.sql.execution.QueryExecution.executedPlan(QueryExecution.scala:77) > at > org.apache.spark.sql.execution.QueryExecution$$anonfun$simpleString$1.apply(QueryExecution.scala:188) > at > org.apache.spark.sql.execution.QueryExecution$$anonfun$simpleString$1.apply(QueryExecution.scala:188) > at > org.apache.spark.sql.execution.QueryExecution.stringOrError(QueryExecution.scala:99) > at > org.apache.spark.sql.execution.QueryExecution.simpleString(QueryExecution.scala:188) > at > org.apache.spark.sql.execution.command.ExplainCommand.run(commands.scala:171) > at > org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult$lzycompute(commands.scala:70) > at > org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult(commands.scala:68) > at > org.apache.spark.sql.execution.command.ExecutedCommandExec.executeCollect(commands.scala:79) > at org.apache.spark.sql.Dataset.explain(Dataset.scala:484) > at org.apache.spark.sql.Dataset.explain(Dataset.scala:497) > ... 49 elided > Caused by: java.lang.reflect.InvocationTargetException: > org.apache.hadoop.hive.metastore.api.MetaException: Filtering is supported > only on partition keys of type string > at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) > at > sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) > at > sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) > at java.lang.reflect.Method.invoke(Method.java:498) > at > org.apache.spark.sql.hive.client.Shim_v0_13.getPartitionsByFilter(HiveShim.scala:759) > ... 99 more > Caused by: org.apache.hadoop.hive.metastore.api.MetaException: Filtering is > supported only on partition keys of type string > at > org.apache.hadoop.hive.metastore.api.ThriftHiveMetastore$get_partitions_by_filter_result$get_partitions_by_filter_resultStandardScheme.read(ThriftHiveMetastore.java) > at > org.apache.hadoop.hive.metastore.api.ThriftHiveMetastore$get_partitions_by_filter_result$get_partitions_by_filter_resultStandardScheme.read(ThriftHiveMetastore.java) > at > org.apache.hadoop.hive.metastore.api.ThriftHiveMetastore$get_partitions_by_filter_result.read(ThriftHiveMetastore.java) > at org.apache.thrift.TServiceClient.receiveBase(TServiceClient.java:86) > at > org.apache.hadoop.hive.metastore.api.ThriftHiveMetastore$Client.recv_get_partitions_by_filter(ThriftHiveMetastore.java:2514) > at > org.apache.hadoop.hive.metastore.api.ThriftHiveMetastore$Client.get_partitions_by_filter(ThriftHiveMetastore.java:2498) > at > org.apache.hadoop.hive.metastore.HiveMetaStoreClient.listPartitionsByFilter(HiveMetaStoreClient.java:1453) > at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) > at > sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) > at > sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) > at java.lang.reflect.Method.invoke(Method.java:498) > at > org.apache.hadoop.hive.metastore.RetryingMetaStoreClient.invoke(RetryingMetaStoreClient.java:154) > at com.sun.proxy.$Proxy33.listPartitionsByFilter(Unknown Source) > at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) > at > sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) > at > sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) > at java.lang.reflect.Method.invoke(Method.java:498) > at > org.apache.hadoop.hive.metastore.HiveMetaStoreClient$SynchronizedHandler.invoke(HiveMetaStoreClient.java:2562) > at com.sun.proxy.$Proxy33.listPartitionsByFilter(Unknown Source) > at > org.apache.hadoop.hive.ql.metadata.Hive.getPartitionsByFilter(Hive.java:2820) > ... 104 more > {code} > I can replicate the issue with the following: > {code:java} > spark.sql("CREATE table test (name STRING) partitioned by (id int) STORED AS > PARQUET") > spark.sql("CREATE VIEW test_view as select cast(id as string) as id, name > from test") > spark.sql("SELECT * FROM test_view WHERE id = '0'").explain > {code} > If we use a table which isn't stored as parquet, there appears to be no issue: > {code:java} > spark.sql("CREATE table test (name STRING) partitioned by (id int)") > spark.sql("CREATE VIEW test_view as select cast(id as string) as id, name > from test") > spark.sql("SELECT * FROM test_view WHERE id = '0'").explain > {code} -- This message was sent by Atlassian Jira (v8.3.4#803005) --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@spark.apache.org For additional commands, e-mail: issues-h...@spark.apache.org