[ https://issues.apache.org/jira/browse/SPARK-49827?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17886648#comment-17886648 ]
Ignite TC Bot commented on SPARK-49827: --------------------------------------- User 'Madhukar525722' has created a pull request for this issue: https://github.com/apache/spark/pull/48337 > Adding batches with retry mechanism for fetching all partitions from metastore > ------------------------------------------------------------------------------ > > Key: SPARK-49827 > URL: https://issues.apache.org/jira/browse/SPARK-49827 > Project: Spark > Issue Type: Improvement > Components: Spark Core > Affects Versions: 3.4.1, 3.5.3 > Reporter: Madhukar > Priority: Major > > The request is inspired from HIVE-27505 > As it is problematic for table with large metadata (> 2GB/ >2000 columns), > crashes the HMS. > Error logs > {code:java} > 24/09/27 13:49:36 ERROR Hive: org.apache.thrift.transport.TTransportException > at > org.apache.thrift.transport.TIOStreamTransport.read(TIOStreamTransport.java:132) > at org.apache.thrift.transport.TTransport.readAll(TTransport.java:86) > at > org.apache.thrift.transport.TSaslTransport.readLength(TSaslTransport.java:374) > at > org.apache.thrift.transport.TSaslTransport.readFrame(TSaslTransport.java:451) > at > org.apache.thrift.transport.TSaslTransport.read(TSaslTransport.java:433) > at > org.apache.thrift.transport.TSaslClientTransport.read(TSaslClientTransport.java:38) > at org.apache.thrift.transport.TTransport.readAll(TTransport.java:86) > at > org.apache.hadoop.hive.thrift.TFilterTransport.readAll(TFilterTransport.java:62) > at > org.apache.thrift.protocol.TBinaryProtocol.readAll(TBinaryProtocol.java:425) > at > org.apache.thrift.protocol.TBinaryProtocol.readI32(TBinaryProtocol.java:321) > at > org.apache.thrift.protocol.TBinaryProtocol.readMessageBegin(TBinaryProtocol.java:225) > at org.apache.thrift.TServiceClient.receiveBase(TServiceClient.java:77) > at > org.apache.hadoop.hive.metastore.api.ThriftHiveMetastore$Client.recv_get_partitions(ThriftHiveMetastore.java:2381) > at > org.apache.hadoop.hive.metastore.api.ThriftHiveMetastore$Client.get_partitions(ThriftHiveMetastore.java:2366) > at > org.apache.hadoop.hive.metastore.HiveMetaStoreClient.listPartitions(HiveMetaStoreClient.java:1175) > at > java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke0(Native Method) > at > java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:77) > at > java.base/jdk.internal.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) > at java.base/java.lang.reflect.Method.invoke(Method.java:569) > at > org.apache.hadoop.hive.metastore.RetryingMetaStoreClient.invoke(RetryingMetaStoreClient.java:173) > at jdk.proxy2/jdk.proxy2.$Proxy59.listPartitions(Unknown Source) > at > java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke0(Native Method) > at > java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:77) > at > java.base/jdk.internal.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) > at java.base/java.lang.reflect.Method.invoke(Method.java:569) > at > org.apache.hadoop.hive.metastore.HiveMetaStoreClient$SynchronizedHandler.invoke(HiveMetaStoreClient.java:2327) > at jdk.proxy2/jdk.proxy2.$Proxy59.listPartitions(Unknown Source) > at > org.apache.hadoop.hive.ql.metadata.Hive.getAllPartitionsOf(Hive.java:2528) > at > java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke0(Native Method) > at > java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:77) > at > java.base/jdk.internal.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) > at java.base/java.lang.reflect.Method.invoke(Method.java:569) > at > org.apache.spark.sql.hive.client.Shim_v0_13.prunePartitionsFastFallback(HiveShim.scala:1164) > at > org.apache.spark.sql.hive.client.Shim_v0_13.getPartitionsByFilter(HiveShim.scala:1102) > at > org.apache.spark.sql.hive.client.HiveClientImpl.$anonfun$getPartitionsByFilter$1(HiveClientImpl.scala:823) > at > org.apache.spark.sql.hive.client.HiveClientImpl.$anonfun$withHiveState$1(HiveClientImpl.scala:313) > at > org.apache.spark.sql.hive.client.HiveClientImpl.liftedTree1$1(HiveClientImpl.scala:244) > at > org.apache.spark.sql.hive.client.HiveClientImpl.retryLocked(HiveClientImpl.scala:243) > {code} > -- This message was sent by Atlassian Jira (v8.20.10#820010) --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@spark.apache.org For additional commands, e-mail: issues-h...@spark.apache.org