[ 
https://issues.apache.org/jira/browse/IMPALA-7119?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Quanlong Huang updated IMPALA-7119:
-----------------------------------
    Fix Version/s: Impala 2.13.0

> HBase tests failing with RetriesExhausted and "RuntimeException: couldn't 
> retrieve HBase table"
> -----------------------------------------------------------------------------------------------
>
>                 Key: IMPALA-7119
>                 URL: https://issues.apache.org/jira/browse/IMPALA-7119
>             Project: IMPALA
>          Issue Type: Bug
>    Affects Versions: Impala 2.13.0
>            Reporter: Tim Armstrong
>            Assignee: Joe McDonnell
>            Priority: Major
>              Labels: broken-build, flaky
>             Fix For: Impala 2.13.0, Impala 3.1.0
>
>
> 64820211a2d30238093f1c4cd03bc268e3a01638
> {noformat}
>     
> metadata.test_compute_stats.TestHbaseComputeStats.test_hbase_compute_stats_incremental[exec_option:
>  {'batch_size': 0, 'num_nodes': 0, 'disable_codegen_rows_threshold': 5000, 
> 'disable_codegen': False, 'abort_on_error': 1, 'debug_action': None, 
> 'exec_single_node_rows_threshold': 0} | table_format: hbase/none]
>     
> metadata.test_compute_stats.TestHbaseComputeStats.test_hbase_compute_stats[exec_option:
>  {'batch_size': 0, 'num_nodes': 0, 'disable_codegen_rows_threshold': 5000, 
> 'disable_codegen': False, 'abort_on_error': 1, 'debug_action': None, 
> 'exec_single_node_rows_threshold': 0} | table_format: hbase/none]
>     query_test.test_mt_dop.TestMtDop.test_mt_dop[mt_dop: 1 | exec_option: 
> {'batch_size': 0, 'num_nodes': 0, 'disable_codegen_rows_threshold': 0, 
> 'disable_codegen': False, 'abort_on_error': 1, 'debug_action': None, 
> 'exec_single_node_rows_threshold': 0} | table_format: hbase/none]
>     query_test.test_mt_dop.TestMtDop.test_compute_stats[mt_dop: 1 | 
> exec_option: {'batch_size': 0, 'num_nodes': 0, 
> 'disable_codegen_rows_threshold': 0, 'disable_codegen': False, 
> 'abort_on_error': 1, 'debug_action': None, 'exec_single_node_rows_threshold': 
> 0} | table_format: hbase/none]
>     
> query_test.test_hbase_queries.TestHBaseQueries.test_hbase_scan_node[exec_option:
>  {'batch_size': 0, 'num_nodes': 0, 'disable_codegen_rows_threshold': 0, 
> 'disable_codegen': False, 'abort_on_error': 1, 'debug_action': None, 
> 'exec_single_node_rows_threshold': 0} | table_format: hbase/none]
>     query_test.test_queries.TestHdfsQueries.test_file_partitions[exec_option: 
> {'batch_size': 0, 'num_nodes': 0, 'disable_codegen_rows_threshold': 0, 
> 'disable_codegen': False, 'abort_on_error': 1, 'debug_action': None, 
> 'exec_single_node_rows_threshold': 0} | table_format: hbase/none]
>     query_test.test_mt_dop.TestMtDop.test_mt_dop[mt_dop: 0 | exec_option: 
> {'batch_size': 0, 'num_nodes': 0, 'disable_codegen_rows_threshold': 0, 
> 'disable_codegen': False, 'abort_on_error': 1, 'debug_action': None, 
> 'exec_single_node_rows_threshold': 0} | table_format: hbase/none]
>     query_test.test_observability.TestObservability.test_scan_summary
>     query_test.test_mt_dop.TestMtDop.test_compute_stats[mt_dop: 0 | 
> exec_option: {'batch_size': 0, 'num_nodes': 0, 
> 'disable_codegen_rows_threshold': 0, 'disable_codegen': False, 
> 'abort_on_error': 1, 'debug_action': None, 'exec_single_node_rows_threshold': 
> 0} | table_format: hbase/none]
>     failure.test_failpoints.TestFailpoints.test_failpoints[table_format: 
> hbase/none | exec_option: {'batch_size': 0, 'num_nodes': 0, 
> 'disable_codegen_rows_threshold': 0, 'disable_codegen': False, 
> 'abort_on_error': 1, 'debug_action': None, 'exec_single_node_rows_threshold': 
> 0} | mt_dop: 4 | location: GETNEXT_SCANNER | action: FAIL | query: select 1 
> from alltypessmall order by id limit 100]
>     failure.test_failpoints.TestFailpoints.test_failpoints[table_format: 
> hbase/none | exec_option: {'batch_size': 0, 'num_nodes': 0, 
> 'disable_codegen_rows_threshold': 0, 'disable_codegen': False, 
> 'abort_on_error': 1, 'debug_action': None, 'exec_single_node_rows_threshold': 
> 0} | mt_dop: 0 | location: OPEN | action: CANCEL | query: select c from 
> (select id c from alltypessmall order by id limit 10) v where c = 1]
>     failure.test_failpoints.TestFailpoints.test_failpoints[table_format: 
> hbase/none | exec_option: {'batch_size': 0, 'num_nodes': 0, 
> 'disable_codegen_rows_threshold': 0, 'disable_codegen': False, 
> 'abort_on_error': 1, 'debug_action': None, 'exec_single_node_rows_threshold': 
> 0} | mt_dop: 0 | location: CLOSE | action: MEM_LIMIT_EXCEEDED | query: select 
> count(*) from alltypessmall]
>     failure.test_failpoints.TestFailpoints.test_failpoints[table_format: 
> hbase/none | exec_option: {'batch_size': 0, 'num_nodes': 0, 
> 'disable_codegen_rows_threshold': 0, 'disable_codegen': False, 
> 'abort_on_error': 1, 'debug_action': None, 'exec_single_node_rows_threshold': 
> 0} | mt_dop: 4 | location: PREPARE | action: MEM_LIMIT_EXCEEDED | query: 
> select count(int_col) from alltypessmall group by id]
>     failure.test_failpoints.TestFailpoints.test_failpoints[table_format: 
> hbase/none | exec_option: {'batch_size': 0, 'num_nodes': 0, 
> 'disable_codegen_rows_threshold': 0, 'disable_codegen': False, 
> 'abort_on_error': 1, 'debug_action': None, 'exec_single_node_rows_threshold': 
> 0} | mt_dop: 4 | location: OPEN | action: MEM_LIMIT_EXCEEDED | query: select 
> * from alltypessmall union all select * from alltypessmall]
>     failure.test_failpoints.TestFailpoints.test_failpoints[table_format: 
> hbase/none | exec_option: {'batch_size': 0, 'num_nodes': 0, 
> 'disable_codegen_rows_threshold': 0, 'disable_codegen': False, 
> 'abort_on_error': 1, 'debug_action': None, 'exec_single_node_rows_threshold': 
> 0} | mt_dop: 4 | location: CLOSE | action: MEM_LIMIT_EXCEEDED | query: select 
> row_number() over (partition by int_col order by id) from alltypessmall]
>     failure.test_failpoints.TestFailpoints.test_failpoints[table_format: 
> hbase/none | exec_option: {'batch_size': 0, 'num_nodes': 0, 
> 'disable_codegen_rows_threshold': 0, 'disable_codegen': False, 
> 'abort_on_error': 1, 'debug_action': None, 'exec_single_node_rows_threshold': 
> 0} | mt_dop: 4 | location: CLOSE | action: MEM_LIMIT_EXCEEDED | query: select 
> 1 from alltypessmall order by id]
>     failure.test_failpoints.TestFailpoints.test_failpoints[table_format: 
> hbase/none | exec_option: {'batch_size': 0, 'num_nodes': 0, 
> 'disable_codegen_rows_threshold': 0, 'disable_codegen': False, 
> 'abort_on_error': 1, 'debug_action': None, 'exec_single_node_rows_threshold': 
> 0} | mt_dop: 4 | location: CLOSE | action: MEM_LIMIT_EXCEEDED | query: select 
> * from alltypes]
>     verifiers.test_verify_metrics.TestValidateMetrics.test_metrics_are_zero
>     
> org.apache.impala.planner.PlannerTest.org.apache.impala.planner.PlannerTest
>     
> org.apache.impala.planner.S3PlannerTest.org.apache.impala.planner.S3PlannerTest
>     failure.test_failpoints.TestFailpoints.test_failpoints[table_format: 
> hbase/none | exec_option: {'batch_size': 0, 'num_nodes': 0, 
> 'disable_codegen_rows_threshold': 0, 'disable_codegen': False, 
> 'abort_on_error': 1, 'debug_action': None, 'exec_single_node_rows_threshold': 
> 0} | mt_dop: 4 | location: GETNEXT | action: FAIL | query: select 1 from 
> alltypessmall a join alltypessmall b on a.id != b.id]
>     failure.test_failpoints.TestFailpoints.test_failpoints[table_format: 
> hbase/none | exec_option: {'batch_size': 0, 'num_nodes': 0, 
> 'disable_codegen_rows_threshold': 0, 'disable_codegen': False, 
> 'abort_on_error': 1, 'debug_action': None, 'exec_single_node_rows_threshold': 
> 0} | mt_dop: 4 | location: PREPARE_SCANNER | action: MEM_LIMIT_EXCEEDED | 
> query: select 1 from alltypessmall a join alltypessmall b on a.id = b.id]
> {noformat}
> {noformat}
> 21:22:44 Running org.apache.impala.planner.S3PlannerTest
> 21:22:44 Tests run: 1, Failures: 0, Errors: 1, Skipped: 0, Time elapsed: 
> 450.328 sec <<< FAILURE! - in org.apache.impala.planner.S3PlannerTest
> 21:22:44 org.apache.impala.planner.S3PlannerTest  Time elapsed: 450.328 sec  
> <<< ERROR!
> 21:22:44      at 
> org.apache.impala.datagenerator.HBaseTestDataRegionAssignment.<init>(HBaseTestDataRegionAssignment.java:68)
> 21:22:44      at 
> org.apache.impala.planner.PlannerTestBase.setUp(PlannerTestBase.java:120)
> 21:22:44      at 
> org.apache.maven.surefire.junit4.JUnit4Provider.executeTestSet(JUnit4Provider.java:153)
> 21:22:44      at 
> org.apache.impala.datagenerator.HBaseTestDataRegionAssignment.<init>(HBaseTestDataRegionAssignment.java:68)
> 21:22:44      at 
> org.apache.impala.planner.PlannerTestBase.setUp(PlannerTestBase.java:120)
> 21:22:44      at 
> org.apache.maven.surefire.junit4.JUnit4Provider.executeTestSet(JUnit4Provider.java:153)
> 21:22:44      at 
> org.apache.impala.datagenerator.HBaseTestDataRegionAssignment.<init>(HBaseTestDataRegionAssignment.java:68)
> 21:22:44      at 
> org.apache.impala.planner.PlannerTestBase.setUp(PlannerTestBase.java:120)
> 21:22:44      at 
> org.apache.maven.surefire.junit4.JUnit4Provider.executeTestSet(JUnit4Provider.java:153)
> 21:22:44 Running org.apache.impala.planner.PlannerTest
> 21:22:44 Tests run: 1, Failures: 0, Errors: 1, Skipped: 0, Time elapsed: 
> 450.602 sec <<< FAILURE! - in org.apache.impala.planner.PlannerTest
> 21:22:44 org.apache.impala.planner.PlannerTest  Time elapsed: 450.602 sec  
> <<< ERROR!
> 21:22:44      at 
> org.apache.impala.datagenerator.HBaseTestDataRegionAssignment.<init>(HBaseTestDataRegionAssignment.java:68)
> 21:22:44      at 
> org.apache.impala.planner.PlannerTestBase.setUp(PlannerTestBase.java:120)
> 21:22:44      at 
> org.apache.maven.surefire.junit4.JUnit4Provider.executeTestSet(JUnit4Provider.java:153)
> 21:22:44      at 
> org.apache.impala.datagenerator.HBaseTestDataRegionAssignment.<init>(HBaseTestDataRegionAssignment.java:68)
> 21:22:44      at 
> org.apache.impala.planner.PlannerTestBase.setUp(PlannerTestBase.java:120)
> 21:22:44      at 
> org.apache.maven.surefire.junit4.JUnit4Provider.executeTestSet(JUnit4Provider.java:153)
> 21:22:44      at 
> org.apache.impala.datagenerator.HBaseTestDataRegionAssignment.<init>(HBaseTestDataRegionAssignment.java:68)
> 21:22:44      at 
> org.apache.impala.planner.PlannerTestBase.setUp(PlannerTestBase.java:120)
> 21:22:44      at 
> org.apache.maven.surefire.junit4.JUnit4Provider.executeTestSet(JUnit4Provider.java:153)
> {noformat}
> {noformat}
> 22:53:05 =================================== FAILURES 
> ===================================
> 22:53:05  TestFailpoints.test_failpoints[table_format: hbase/none | 
> exec_option: {'batch_size': 0, 'num_nodes': 0, 
> 'disable_codegen_rows_threshold': 0, 'disable_codegen': False, 
> 'abort_on_error': 1, 'debug_action': None, 'exec_single_node_rows_threshold': 
> 0} | mt_dop: 4 | location: GETNEXT_SCANNER | action: FAIL | query: select 1 
> from alltypessmall order by id limit 100] 
> 22:53:05 failure/test_failpoints.py:102: in test_failpoints
> 22:53:05     raise e
> 22:53:05 E   ImpalaBeeswaxException: ImpalaBeeswaxException:
> 22:53:05 E    INNER EXCEPTION: <class 'beeswaxd.ttypes.BeeswaxException'>
> 22:53:05 E    MESSAGE: RuntimeException: couldn't retrieve HBase table 
> (functional_hbase.alltypessmall) info:
> 22:53:05 E   Connection refused
> 22:53:05 E   CAUSED BY: ConnectException: Connection refused
> 22:53:05  TestFailpoints.test_failpoints[table_format: hbase/none | 
> exec_option: {'batch_size': 0, 'num_nodes': 0, 
> 'disable_codegen_rows_threshold': 0, 'disable_codegen': False, 
> 'abort_on_error': 1, 'debug_action': None, 'exec_single_node_rows_threshold': 
> 0} | mt_dop: 0 | location: OPEN | action: CANCEL | query: select c from 
> (select id c from alltypessmall order by id limit 10) v where c = 1] 
> 22:53:05 failure/test_failpoints.py:102: in test_failpoints
> 22:53:05     raise e
> 22:53:05 E   ImpalaBeeswaxException: ImpalaBeeswaxException:
> 22:53:05 E    INNER EXCEPTION: <class 'beeswaxd.ttypes.BeeswaxException'>
> 22:53:05 E    MESSAGE: RuntimeException: couldn't retrieve HBase table 
> (functional_hbase.alltypessmall) info:
> 22:53:05 E   Connection refused
> 22:53:05 E   CAUSED BY: ConnectException: Connection refused
> {noformat}
> {noformat}
> 23:21:02  
> TestHbaseComputeStats.test_hbase_compute_stats_incremental[exec_option: 
> {'batch_size': 0, 'num_nodes': 0, 'disable_codegen_rows_threshold': 5000, 
> 'disable_codegen': False, 'abort_on_error': 1, 'debug_action': None, 
> 'exec_single_node_rows_threshold': 0} | table_format: hbase/none] 
> 23:21:02 [gw3] linux2 -- Python 2.7.5 
> /data/jenkins/workspace/impala-asf-2.x-core/repos/Impala/bin/../infra/python/env/bin/python
> 23:21:02 metadata/test_compute_stats.py:147: in 
> test_hbase_compute_stats_incremental
> 23:21:02     unique_database)
> 23:21:02 common/impala_test_suite.py:405: in run_test_case
> 23:21:02     result = self.__execute_query(target_impalad_client, query, 
> user=user)
> 23:21:02 common/impala_test_suite.py:620: in __execute_query
> 23:21:02     return impalad_client.execute(query, user=user)
> 23:21:02 common/impala_connection.py:160: in execute
> 23:21:02     return self.__beeswax_client.execute(sql_stmt, user=user)
> 23:21:02 beeswax/impala_beeswax.py:173: in execute
> 23:21:02     handle = self.__execute_query(query_string.strip(), user=user)
> 23:21:02 beeswax/impala_beeswax.py:341: in __execute_query
> 23:21:02     self.wait_for_completion(handle)
> 23:21:02 beeswax/impala_beeswax.py:361: in wait_for_completion
> 23:21:02     raise ImpalaBeeswaxException("Query aborted:" + error_log, None)
> 23:21:02 E   ImpalaBeeswaxException: ImpalaBeeswaxException:
> 23:21:02 E    Query aborted:RuntimeException: couldn't retrieve HBase table 
> (functional_hbase.alltypessmall) info:
> 23:21:02 E   This server is in the failed servers list: 
> localhost/127.0.0.1:16202
> 23:21:02 E   CAUSED BY: FailedServerException: This server is in the failed 
> servers list: localhost/127.0.0.1:16202
> {noformat}



--
This message was sent by Atlassian JIRA
(v7.6.3#76005)

---------------------------------------------------------------------
To unsubscribe, e-mail: issues-all-unsubscr...@impala.apache.org
For additional commands, e-mail: issues-all-h...@impala.apache.org

Reply via email to