Rajesh Balamohan created HIVE-24790: ---------------------------------------
Summary: Batch column stats updates to HMS Key: HIVE-24790 URL: https://issues.apache.org/jira/browse/HIVE-24790 Project: Hive Issue Type: Improvement Components: HiveServer2 Reporter: Rajesh Balamohan When large number of partitions are inserted/updated, it would be good to batch column statistics updates to HMS. Currently, HS2 ends up throwing read timeout exception when updating HMS. https://github.com/apache/hive/blob/master/ql/src/java/org/apache/hadoop/hive/ql/stats/ColStatsProcessor.java#L180 {noformat} Caused by: java.net.SocketTimeoutException: Read timed out at java.net.SocketInputStream.socketRead0(Native Method) ~[?:1.8.0_252] at java.net.SocketInputStream.socketRead(SocketInputStream.java:116) ~[?:1.8.0_252] at java.net.SocketInputStream.read(SocketInputStream.java:171) ~[?:1.8.0_252] at java.net.SocketInputStream.read(SocketInputStream.java:141) ~[?:1.8.0_252] at java.io.BufferedInputStream.fill(BufferedInputStream.java:246) ~[?:1.8.0_252] at java.io.BufferedInputStream.read1(BufferedInputStream.java:286) ~[?:1.8.0_252] at java.io.BufferedInputStream.read(BufferedInputStream.java:345) ~[?:1.8.0_252] at org.apache.thrift.transport.TIOStreamTransport.read(TIOStreamTransport.java:127) ~[hive-exec-3.1] at org.apache.thrift.transport.TTransport.readAll(TTransport.java:86) ~[hive-exec-3.1] at org.apache.thrift.transport.TSaslTransport.readLength(TSaslTransport.java:374) ~[hive-exec-3.1] at org.apache.thrift.transport.TSaslTransport.readFrame(TSaslTransport.java:451) ~[hive-exec-3.1] at org.apache.thrift.transport.TSaslTransport.read(TSaslTransport.java:433) ~[hive-exec-3.1] at org.apache.thrift.transport.TSaslClientTransport.read(TSaslClientTransport.java:37) ~[hive-exec-3.1] at org.apache.thrift.transport.TTransport.readAll(TTransport.java:86) ~[hive-exec-3.1] at org.apache.hadoop.hive.metastore.security.TFilterTransport.readAll(TFilterTransport.java:62) ~[hive-exec-3.1] at org.apache.thrift.protocol.TBinaryProtocol.readAll(TBinaryProtocol.java:429) ~[hive-exec-3.1] at org.apache.thrift.protocol.TBinaryProtocol.readI32(TBinaryProtocol.java:318) ~[hive-exec-3.1] at org.apache.thrift.protocol.TBinaryProtocol.readMessageBegin(TBinaryProtocol.java:219) ~[hive-exec-3.1] at org.apache.thrift.TServiceClient.receiveBase(TServiceClient.java:77) ~[hive-exec-3.1] at org.apache.hadoop.hive.metastore.api.ThriftHiveMetastore$Client.recv_set_aggr_stats_for(ThriftHiveMetastore.java:4561) ~[hive-exec-3.1] at org.apache.hadoop.hive.metastore.api.ThriftHiveMetastore$Client.set_aggr_stats_for(ThriftHiveMetastore.java:4548) ~[hive-exec-3.1] at org.apache.hadoop.hive.metastore.HiveMetaStoreClient.setPartitionColumnStatistics(HiveMetaStoreClient.java:2496) ~[hive-exec-3.1] at org.apache.hadoop.hive.ql.metadata.SessionHiveMetaStoreClient.setPartitionColumnStatistics(SessionHiveMetaStoreClient.java:515) ~[hive-exec-3.1] at sun.reflect.GeneratedMethodAccessor194.invoke(Unknown Source) ~[?:?] at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) ~[?:1.8.0_252] at java.lang.reflect.Method.invoke(Method.java:498) ~[?:1.8.0_252] at org.apache.hadoop.hive.metastore.RetryingMetaStoreClient.invoke(RetryingMetaStoreClient.java:212) ~[hive-exec-3.1] at com.sun.proxy.$Proxy60.setPartitionColumnStatistics(Unknown Source) ~[?:?] at sun.reflect.GeneratedMethodAccessor194.invoke(Unknown Source) ~[?:?] at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) ~[?:1.8.0_252] at java.lang.reflect.Method.invoke(Method.java:498) ~[?:1.8.0_252] at org.apache.hadoop.hive.metastore.HiveMetaStoreClient$SynchronizedHandler.invoke(HiveMetaStoreClient.java:3431) ~[hive-exec-3.1] at com.sun.proxy.$Proxy60.setPartitionColumnStatistics(Unknown Source) ~[?:?] at org.apache.hadoop.hive.ql.metadata.Hive.setPartitionColumnStatistics(Hive.java:5213) ~[hive-exec-3.1] at org.apache.hadoop.hive.ql.stats.ColStatsProcessor.persistColumnStats(ColStatsProcessor.java:192) ~[hive-exec-3.1] at org.apache.hadoop.hive.ql.stats.ColStatsProcessor.process(ColStatsProcessor.java:87) ~[hive-exec-3.1] at org.apache.hadoop.hive.ql.exec.StatsTask.execute(StatsTask.java:107) ~[hive-exec-3.1] at org.apache.hadoop.hive.ql.exec.Task.executeTask(Task.java:213) ~[hive-exec-3.1] at org.apache.hadoop.hive.ql.exec.TaskRunner.runSequential(TaskRunner.java:105) ~[hive-exec-3.1] at org.apache.hadoop.hive.ql.Executor.launchTask(Executor.java:357) ~[hive-exec-3.1] {noformat} -- This message was sent by Atlassian Jira (v8.3.4#803005)