[ https://issues.apache.org/jira/browse/HIVE-8766?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=14201281#comment-14201281 ]
Hari Sankar Sivarama Subramaniyan commented on HIVE-8766: --------------------------------------------------------- [~hagleitn] The null scenarios are handled in the if conditions. I am using the fully qualified name and not importing NucleusExceptions because it is a one time usage within the entire RetryingHMSHandler code. If this is not preferable, I can modify the patch. Thanks Hari > Hive RetryHMSHandler should be retrying the metastore operation in case of > NucleusException > ------------------------------------------------------------------------------------------- > > Key: HIVE-8766 > URL: https://issues.apache.org/jira/browse/HIVE-8766 > Project: Hive > Issue Type: Bug > Components: Metastore > Reporter: Hari Sankar Sivarama Subramaniyan > Assignee: Hari Sankar Sivarama Subramaniyan > Fix For: 0.14.0 > > Attachments: HIVE-8766.1.patch > > > When we have Metastore operations and the Metastore Database is heavily > loaded or takes a long time to respond, we might run into NucleusExceptions > as shown in the below stacktrace. In the below scenario, the MetastoreDB is > SQL Server and the SQLServer is configured to timeout and terminate with a > connection reset after 'x' seconds if it doesnt return a ResultSet. While > this needs configuration change at the Metastore DB side, we need to make > sure that in such cases the HMS Retrying mechanism should not provide a rigid > rule to fail such hive queries. The proposed fix would be allow retries when > we hit a Nucleus Exception as shown below: > {noformat} > 2014-11-04 06:40:03,208 ERROR bonecp.ConnectionHandle > (ConnectionHandle.java:markPossiblyBroken(388)) - Database access problem. > Killing off this connection and all remaining connections in the connection > pool. SQL State = 08S01 > 2014-11-04 06:40:03,213 ERROR DataNucleus.Transaction > (Log4JLogger.java:error(115)) - Operation rollback failed on resource: > org.datanucleus.store.rdbms.ConnectionFactoryImpl$EmulatedXAResource@1a35cc16, > error code UNKNOWN and transaction: [DataNucleus Transaction, ID=Xid= �, > enlisted > resources=[org.datanucleus.store.rdbms.ConnectionFactoryImpl$EmulatedXAResource@1a35cc16]] > 2014-11-04 06:40:03,217 ERROR metastore.RetryingHMSHandler > (RetryingHMSHandler.java:invoke(139)) - > MetaException(message:org.datanucleus.exceptions.NucleusDataStoreException: > Size request failed : SELECT COUNT(*) FROM SKEWED_VALUES THIS WHERE > THIS.SD_ID_OID=? AND THIS.INTEGER_IDX>=0) > at > org.apache.hadoop.hive.metastore.HiveMetaStore$HMSHandler.newMetaException(HiveMetaStore.java:5183) > at > org.apache.hadoop.hive.metastore.HiveMetaStore$HMSHandler.get_table_core(HiveMetaStore.java:1738) > at > org.apache.hadoop.hive.metastore.HiveMetaStore$HMSHandler.get_table(HiveMetaStore.java:1699) > at sun.reflect.GeneratedMethodAccessor15.invoke(Unknown Source) > at > sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) > at java.lang.reflect.Method.invoke(Method.java:606) > at > org.apache.hadoop.hive.metastore.RetryingHMSHandler.invoke(RetryingHMSHandler.java:101) > at com.sun.proxy.$Proxy11.get_table(Unknown Source) > at > org.apache.hadoop.hive.metastore.HiveMetaStoreClient.getTable(HiveMetaStoreClient.java:1091) > at > org.apache.hadoop.hive.ql.metadata.SessionHiveMetaStoreClient.getTable(SessionHiveMetaStoreClient.java:112) > at sun.reflect.GeneratedMethodAccessor16.invoke(Unknown Source) > at > sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) > at java.lang.reflect.Method.invoke(Method.java:606) > at > org.apache.hadoop.hive.metastore.RetryingMetaStoreClient.invoke(RetryingMetaStoreClient.java:90) > at com.sun.proxy.$Proxy12.getTable(Unknown Source) > at org.apache.hadoop.hive.ql.metadata.Hive.getTable(Hive.java:1060) > at org.apache.hadoop.hive.ql.metadata.Hive.getTable(Hive.java:1015) > at > org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer.getTable(BaseSemanticAnalyzer.java:1316) > at > org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer.getTable(BaseSemanticAnalyzer.java:1309) > at > org.apache.hadoop.hive.ql.parse.DDLSemanticAnalyzer.addInputsOutputsAlterTable(DDLSemanticAnalyzer.java:1387) > at > org.apache.hadoop.hive.ql.parse.DDLSemanticAnalyzer.analyzeAlterTableSerde(DDLSemanticAnalyzer.java:1356) > at > org.apache.hadoop.hive.ql.parse.DDLSemanticAnalyzer.analyzeInternal(DDLSemanticAnalyzer.java:299) > at > org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer.analyze(BaseSemanticAnalyzer.java:221) > at org.apache.hadoop.hive.ql.Driver.compile(Driver.java:415) > at org.apache.hadoop.hive.ql.Driver.compile(Driver.java:303) > at org.apache.hadoop.hive.ql.Driver.compileInternal(Driver.java:1067) > at org.apache.hadoop.hive.ql.Driver.compileAndRespond(Driver.java:1061) > at > org.apache.hive.service.cli.operation.SQLOperation.prepare(SQLOperation.java:100) > at > org.apache.hive.service.cli.operation.SQLOperation.runInternal(SQLOperation.java:171) > at > org.apache.hive.service.cli.operation.Operation.run(Operation.java:256) > at > org.apache.hive.service.cli.session.HiveSessionImpl.executeStatementInternal(HiveSessionImpl.java:376) > at > org.apache.hive.service.cli.session.HiveSessionImpl.executeStatementAsync(HiveSessionImpl.java:363) > at > org.apache.hive.service.cli.CLIService.executeStatementAsync(CLIService.java:247) > at > org.apache.hive.service.cli.thrift.ThriftCLIService.ExecuteStatement(ThriftCLIService.java:396) > at > org.apache.hive.service.cli.thrift.TCLIService$Processor$ExecuteStatement.getResult(TCLIService.java:1313) > at > org.apache.hive.service.cli.thrift.TCLIService$Processor$ExecuteStatement.getResult(TCLIService.java:1298) > at org.apache.thrift.ProcessFunction.process(ProcessFunction.java:39) > at org.apache.thrift.TBaseProcessor.process(TBaseProcessor.java:39) > at > org.apache.hive.service.auth.TSetIpAddressProcessor.process(TSetIpAddressProcessor.java:56) > at > org.apache.thrift.server.TThreadPoolServer$WorkerProcess.run(TThreadPoolServer.java:206) > at > java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145) > at > java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615) > at java.lang.Thread.run(Thread.java:745) > Caused by: org.datanucleus.exceptions.NucleusDataStoreException: Size request > failed : SELECT COUNT(*) FROM SKEWED_VALUES THIS WHERE THIS.SD_ID_OID=? AND > THIS.INTEGER_IDX>=0 > at > org.datanucleus.store.rdbms.scostore.ElementContainerStore.getSize(ElementContainerStore.java:666) > at > org.datanucleus.store.rdbms.scostore.ElementContainerStore.size(ElementContainerStore.java:429) > at org.datanucleus.store.types.backed.List.size(List.java:581) > at > org.apache.hadoop.hive.metastore.ObjectStore.convertToSkewedValues(ObjectStore.java:1190) > at > org.apache.hadoop.hive.metastore.ObjectStore.convertToStorageDescriptor(ObjectStore.java:1168) > at > org.apache.hadoop.hive.metastore.ObjectStore.convertToStorageDescriptor(ObjectStore.java:1178) > at > org.apache.hadoop.hive.metastore.ObjectStore.convertToTable(ObjectStore.java:1035) > at > org.apache.hadoop.hive.metastore.ObjectStore.getTable(ObjectStore.java:893) > at sun.reflect.GeneratedMethodAccessor13.invoke(Unknown Source) > at > sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) > at java.lang.reflect.Method.invoke(Method.java:606) > at > org.apache.hadoop.hive.metastore.RawStoreProxy.invoke(RawStoreProxy.java:98) > at com.sun.proxy.$Proxy10.getTable(Unknown Source) > at > org.apache.hadoop.hive.metastore.HiveMetaStore$HMSHandler.get_table_core(HiveMetaStore.java:1727) > ... 41 more > Caused by: com.microsoft.sqlserver.jdbc.SQLServerException: SSL peer shut > down incorrectly > at > com.microsoft.sqlserver.jdbc.SQLServerConnection.terminate(SQLServerConnection.java:1352) > at > com.microsoft.sqlserver.jdbc.SQLServerConnection.terminate(SQLServerConnection.java:1339) > at com.microsoft.sqlserver.jdbc.TDSChannel.read(IOBuffer.java:1694) > at com.microsoft.sqlserver.jdbc.TDSReader.readPacket(IOBuffer.java:3734) > at > com.microsoft.sqlserver.jdbc.TDSCommand.startResponse(IOBuffer.java:5062) > at > com.microsoft.sqlserver.jdbc.SQLServerPreparedStatement.doExecutePreparedStatement(SQLServerPreparedStatement.java:388) > at > com.microsoft.sqlserver.jdbc.SQLServerPreparedStatement$PrepStmtExecCmd.doExecute(SQLServerPreparedStatement.java:340) > at com.microsoft.sqlserver.jdbc.TDSCommand.execute(IOBuffer.java:4615) > at > com.microsoft.sqlserver.jdbc.SQLServerConnection.executeCommand(SQLServerConnection.java:1400) > at > com.microsoft.sqlserver.jdbc.SQLServerStatement.executeCommand(SQLServerStatement.java:179) > at > com.microsoft.sqlserver.jdbc.SQLServerStatement.executeStatement(SQLServerStatement.java:154) > at > com.microsoft.sqlserver.jdbc.SQLServerPreparedStatement.executeQuery(SQLServerPreparedStatement.java:283) > at > com.jolbox.bonecp.PreparedStatementHandle.executeQuery(PreparedStatementHandle.java:174) > at > org.datanucleus.store.rdbms.ParamLoggingPreparedStatement.executeQuery(ParamLoggingPreparedStatement.java:381) > at > org.datanucleus.store.rdbms.SQLController.executeStatementQuery(SQLController.java:504) > at > org.datanucleus.store.rdbms.scostore.ElementContainerStore.getSize(ElementContainerStore.java:638) > {noformat} -- This message was sent by Atlassian JIRA (v6.3.4#6332)