[ https://issues.apache.org/jira/browse/AMBARI-13145?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=14876630#comment-14876630 ]
Hudson commented on AMBARI-13145: --------------------------------- FAILURE: Integrated in Ambari-trunk-Commit #3467 (See [https://builds.apache.org/job/Ambari-trunk-Commit/3467/]) AMBARI-13145 - RU - Skipping failed task caused remaining pending tasks to be ABORTED (jonathanhurley) (jhurley: http://git-wip-us.apache.org/repos/asf?p=ambari.git&a=commit&h=9dd623abb78e094bbf6ab5fcd4763cf2efa96c4b) * ambari-server/src/test/java/org/apache/ambari/server/actionmanager/TestActionScheduler.java * ambari-server/src/main/java/org/apache/ambari/server/actionmanager/ActionScheduler.java > RU - Skipping failed task caused remaining pending tasks to be ABORTED > ---------------------------------------------------------------------- > > Key: AMBARI-13145 > URL: https://issues.apache.org/jira/browse/AMBARI-13145 > Project: Ambari > Issue Type: Bug > Components: ambari-server > Affects Versions: 2.1.0 > Reporter: Jonathan Hurley > Assignee: Jonathan Hurley > Priority: Blocker > Fix For: 2.1.2 > > Attachments: AMBARI-13145.patch > > > Aborting a failed task during an upgrade causes the entire upgrade request to > become ABORTED. > {code:title=Failed & Skipped command} > { > "href": > "http://172.22.65.131:8080/api/v1/clusters/cl1/upgrades/64/upgrade_groups/11/upgrade_items/414?fields=UpgradeItem/*,tasks/Tasks/*", > "UpgradeItem": { > "cluster_host_info": > "{\"hs_host\":[\"9\"],\"namenode_host\":[\"17\",\"14\"],\"hive_metastore_host\":[\"19\",\"8\",\"11\"],\"hbase_rs_hosts\":[\"0-19\"],\"zookeeper_hosts\":[\"3\",\"14\",\"11\"],\"metrics_monitor_hosts\":[\"0-19\"],\"rm_host\":[\"16\",\"14\"],\"slave_hosts\":[\"0-19\"],\"app_timeline_server_hosts\":[\"6\"],\"ambari_server_host\":[\"os-s11-3-ectjrs-c102dalsechanr-21.novalocal\"],\"metrics_collector_hosts\":[\"3\"],\"all_ping_ports\":[\"8670:0-19\"],\"all_racks\":[\"/default-rack:0-19\"],\"knox_gateway_hosts\":[\"7\"],\"flume_hosts\":[\"0-19\"],\"falcon_server_hosts\":[\"15\"],\"hbase_master_hosts\":[\"19\",\"8\",\"14\"],\"journalnode_hosts\":[\"14,15\",\"11\"],\"nm_hosts\":[\"0-19\"],\"hive_server_host\":[\"19\",\"8\",\"11\"],\"zkfc_hosts\":[\"17\",\"14\"],\"all_hosts\":[\"os-s11-3-ectjrs-c102dalsechanr-8.novalocal\",\"os-s11-3-ectjrs-c102dalsechanr-2.novalocal\",\"os-s11-3-ectjrs-c102dalsechanr-5.novalocal\",\"os-s11-3-ectjrs-c102dalsechanr-1.novalocal\",\"os-s11-3-ectjrs-c102dalsechanr-3.novalocal\",\"os-s11-3-ectjrs-c102dalsechanr-16.novalocal\",\"os-s11-3-ectjrs-c102dalsechanr-19.novalocal\",\"os-s11-3-ectjrs-c102dalsechanr-17.novalocal\",\"os-s11-3-ectjrs-c102dalsechanr-21.novalocal\",\"os-s11-3-ectjrs-c102dalsechanr-14.novalocal\",\"os-s11-3-ectjrs-c102dalsechanr-18.novalocal\",\"os-s11-3-ectjrs-c102dalsechanr-11.novalocal\",\"os-s11-3-ectjrs-c102dalsechanr-7.novalocal\",\"os-s11-3-ectjrs-c102dalsechanr-9.novalocal\",\"os-s11-3-ectjrs-c102dalsechanr-10.novalocal\",\"os-s11-3-ectjrs-c102dalsechanr-12.novalocal\",\"os-s11-3-ectjrs-c102dalsechanr-15.novalocal\",\"os-s11-3-ectjrs-c102dalsechanr-13.novalocal\",\"os-s11-3-ectjrs-c102dalsechanr-6.novalocal\",\"os-s11-3-ectjrs-c102dalsechanr-20.novalocal\"],\"all_ipv4_ips\":[\"172.22.65.185:4\",\"172.22.65.222:12\",\"172.22.65.95:7\",\"172.22.65.225:2\",\"172.22.65.10:11\",\"172.22.65.46:17\",\"172.22.65.109:10\",\"172.22.65.17:14\",\"172.22.65.181:3\",\"172.22.65.33:15\",\"172.22.65.218:18\",\"172.22.65.21:13\",\"172.22.65.119:19\",\"172.22.65.58:9\",\"172.22.65.182:1\",\"172.22.65.100:6\",\"172.22.65.66:16\",\"172.22.65.68:5\",\"172.22.65.239:0\",\"172.22.65.131:8\"],\"oozie_server\":[\"14,15\",\"11\"],\"webhcat_server_host\":[\"19\",\"8\",\"11\"]}", > "cluster_name": "cl1", > "command_params": > "{\"original_stack\":\"HDP-2.2\",\"upgrade_direction\":\"upgrade\",\"target_stack\":\"HDP-2.3\",\"forceRefreshConfigTagsBeforeExecution\":\"*\",\"version\":\"2.3.2.0-2844\"}", > "context": "Service Check Hive", > "end_time": -1, > "group_id": 11, > "host_params": > "{\"ambari_db_rca_driver\":\"org.postgresql.Driver\",\"ambari_db_rca_password\":\"bigdatacustom\",\"ambari_db_rca_url\":\"jdbc:postgresql://172.22.65.184:5432/ambaricustom\",\"ambari_db_rca_username\":\"ambaricustomuser\",\"current_version\":\"2.2.6.0-2800\",\"db_driver_filename\":\"mysql-connector-java.jar\",\"db_name\":\"ambaricustom\",\"host_sys_prepped\":\"false\",\"java_home\":\"/usr/jdk64/jdk1.7.0_67\",\"java_version\":\"7\",\"jdk_location\":\"http://os-s11-3-ectjrs-c102dalsechanr-21.novalocal:8080/resources/\",\"mysql_jdbc_url\":\"http://os-s11-3-ectjrs-c102dalsechanr-21.novalocal:8080/resources//mysql-connector-java.jar\",\"oracle_jdbc_url\":\"http://os-s11-3-ectjrs-c102dalsechanr-21.novalocal:8080/resources//ojdbc6.jar\",\"stack_name\":\"HDP\",\"stack_version\":\"2.3\"}", > "log_info": null, > "progress_percent": 100, > "request_id": 64, > "skippable": true, > "stage_id": 414, > "start_time": 1442469303768, > "status": "COMPLETED", > "text": "Service Check Hive" > }, > "tasks": [ > { > "href": > "http://172.22.65.131:8080/api/v1/clusters/cl1/upgrades/64/upgrade_groups/11/upgrade_items/414/tasks/3459", > "Tasks": { > "attempt_cnt": 5, > "cluster_name": "cl1", > "command": "SERVICE_CHECK", > "command_detail": "SERVICE_CHECK HIVE", > "end_time": -1, > "error_log": "/var/lib/ambari-agent/data/errors-3459.txt", > "exit_code": 999, > "host_name": "os-s11-3-ectjrs-c102dalsechanr-8.novalocal", > "id": 3459, > "output_log": "/var/lib/ambari-agent/data/output-3459.txt", > "request_id": 64, > "role": "HIVE_SERVICE_CHECK", > "stage_id": 414, > "start_time": 1442469303768, > "status": "FAILED", > "stderr": "Python script has been killed due to timeout after waiting > 300 secs", > "stdout": "2015-09-17 05:55:08,128 - hadoop-client is currently at > version 2.3.2.0-2844\n2015-09-17 05:55:08,168 - checked_call['conf-select > set-conf-dir --package hadoop --stack-version 2.3.2.0-2844 --conf-version 0'] > {'logoutput': False, 'sudo': True, 'quiet': False}\n2015-09-17 05:55:08,209 - > checked_call returned (0, '/usr/hdp/2.3.2.0-2844/hadoop/conf -> > /etc/hadoop/2.3.2.0-2844/0')\n2015-09-17 05:55:08,247 - hadoop-client is > currently at version 2.3.2.0-2844\nTest connectivity to hive server\nWaiting > for the Hive server to start...\n2015-09-17 05:55:08,346 - > Execute['/usr/bin/kinit -kt /etc/security/keytabs/smokeuser.headless.keytab > ambari...@example.com; '] {'user': 'ambari-qa'}\n2015-09-17 05:55:08,415 - > Execute['! beeline -u > 'jdbc:hive2://os-s11-3-ectjrs-c102dalsechanr-20.novalocal:10010/;transportMode=binary;principal=hive/_h...@example.com' > -e '' 2>&1| awk '{print}'|grep -i -e 'Connection refused' -e 'Invalid URL''] > {'path': ['/bin/', '/usr/bin/', '/usr/lib/hive/bin/', '/usr/sbin/'], 'user': > 'ambari-qa', 'timeout': 30}\nSuccessfully connected to > os-s11-3-ectjrs-c102dalsechanr-20.novalocal on port 10010\nSuccessfully > connected to Hive at os-s11-3-ectjrs-c102dalsechanr-8.novalocal on port 10010 > after 8 seconds\n2015-09-17 05:55:16,613 - > File['/var/lib/ambari-agent/tmp/hcatSmoke.sh'] {'content': > StaticFile('hcatSmoke.sh'), 'mode': 0755}\n2015-09-17 05:55:16,614 - > Execute['/usr/bin/kinit -kt /etc/security/keytabs/smokeuser.headless.keytab > ambari...@example.com; env JAVA_HOME=/usr/jdk64/jdk1.7.0_67 > /var/lib/ambari-agent/tmp/hcatSmoke.sh hcatsmokeid16acef41_date551715 prepare > true'] {'logoutput': True, 'path': ['/usr/sbin', '/usr/local/bin', '/bin', > '/usr/bin', > '/usr/sbin:/sbin:/usr/lib/ambari-server/*:/sbin:/usr/sbin:/usr/local/sbin:/usr/local/bin:/bin:/usr/bin:/usr/X11R6/bin:/var/lib/ambari-agent:/usr/hdp/2.3.2.0-2844/hadoop/bin:/usr/hdp/2.3.2.0-2844/hive/bin'], > 'tries': 3, 'user': 'ambari-qa', 'try_sleep': 5}\nWARNING: Use \"yarn jar\" > to launch YARN applications.\nivysettings.xml file not found in HIVE_HOME or > HIVE_CONF_DIR,file:/grid/0/hdp/2.3.2.0-2844/hadoop/lib/hadoop-lzo-0.6.0.2.3.2.0-2844-sources.jar!/ivysettings.xml > will be used", > "structured_out": {} > } > } > ] > } > {code} > {code:title=ABORTED upgrade request} > { > "href": > "http://172.22.65.131:8080/api/v1/clusters/cl1/upgrades/64?fields=upgrade_groups/UpgradeGroup/*", > "Upgrade": { > "cluster_name": "cl1", > "request_id": 64 > }, > "upgrade_groups": [ > { > "href": > "http://172.22.65.131:8080/api/v1/clusters/cl1/upgrades/64/upgrade_groups/1", > "UpgradeGroup": { > "completed_task_count": 5, > "group_id": 1, > "in_progress_task_count": 0, > "name": "PRE_CLUSTER", > "progress_percent": 100, > "request_id": 64, > "status": "COMPLETED", > "title": "Prepare Upgrade", > "total_task_count": 5 > } > }, > { > "href": > "http://172.22.65.131:8080/api/v1/clusters/cl1/upgrades/64/upgrade_groups/2", > "UpgradeGroup": { > "completed_task_count": 4, > "group_id": 2, > "in_progress_task_count": 0, > "name": "ZOOKEEPER", > "progress_percent": 100, > "request_id": 64, > "status": "COMPLETED", > "title": "ZooKeeper", > "total_task_count": 4 > } > }, > { > "href": > "http://172.22.65.131:8080/api/v1/clusters/cl1/upgrades/64/upgrade_groups/3", > "UpgradeGroup": { > "completed_task_count": 36, > "group_id": 3, > "in_progress_task_count": 0, > "name": "CORE_MASTER", > "progress_percent": 100, > "request_id": 64, > "status": "COMPLETED", > "title": "Core Masters", > "total_task_count": 36 > } > }, > { > "href": > "http://172.22.65.131:8080/api/v1/clusters/cl1/upgrades/64/upgrade_groups/4", > "UpgradeGroup": { > "completed_task_count": 15, > "group_id": 4, > "in_progress_task_count": 0, > "name": "SERVICE_CHECK", > "progress_percent": 100, > "request_id": 64, > "status": "COMPLETED", > "title": "All Service Checks", > "total_task_count": 15 > } > }, > { > "href": > "http://172.22.65.131:8080/api/v1/clusters/cl1/upgrades/64/upgrade_groups/5", > "UpgradeGroup": { > "completed_task_count": 61, > "group_id": 5, > "in_progress_task_count": 0, > "name": "CORE_SLAVES", > "progress_percent": 100, > "request_id": 64, > "status": "COMPLETED", > "title": "Core Slaves", > "total_task_count": 61 > } > }, > { > "href": > "http://172.22.65.131:8080/api/v1/clusters/cl1/upgrades/64/upgrade_groups/6", > "UpgradeGroup": { > "completed_task_count": 15, > "group_id": 6, > "in_progress_task_count": 0, > "name": "SERVICE_CHECK", > "progress_percent": 100, > "request_id": 64, > "status": "COMPLETED", > "title": "All Service Checks", > "total_task_count": 15 > } > }, > { > "href": > "http://172.22.65.131:8080/api/v1/clusters/cl1/upgrades/64/upgrade_groups/7", > "UpgradeGroup": { > "completed_task_count": 21, > "group_id": 7, > "in_progress_task_count": 0, > "name": "HIVE", > "progress_percent": 100, > "request_id": 64, > "status": "COMPLETED", > "title": "Hive", > "total_task_count": 21 > } > }, > { > "href": > "http://172.22.65.131:8080/api/v1/clusters/cl1/upgrades/64/upgrade_groups/8", > "UpgradeGroup": { > "completed_task_count": 28, > "group_id": 8, > "in_progress_task_count": 0, > "name": "OOZIE", > "progress_percent": 100, > "request_id": 64, > "status": "COMPLETED", > "title": "Oozie", > "total_task_count": 28 > } > }, > { > "href": > "http://172.22.65.131:8080/api/v1/clusters/cl1/upgrades/64/upgrade_groups/9", > "UpgradeGroup": { > "completed_task_count": 22, > "group_id": 9, > "in_progress_task_count": 0, > "name": "FALCON", > "progress_percent": 100, > "request_id": 64, > "status": "COMPLETED", > "title": "Falcon", > "total_task_count": 22 > } > }, > { > "href": > "http://172.22.65.131:8080/api/v1/clusters/cl1/upgrades/64/upgrade_groups/10", > "UpgradeGroup": { > "completed_task_count": 201, > "group_id": 10, > "in_progress_task_count": 0, > "name": "CLIENTS", > "progress_percent": 100, > "request_id": 64, > "status": "COMPLETED", > "title": "Client Components", > "total_task_count": 201 > } > }, > { > "href": > "http://172.22.65.131:8080/api/v1/clusters/cl1/upgrades/64/upgrade_groups/11", > "UpgradeGroup": { > "completed_task_count": 22, > "group_id": 11, > "in_progress_task_count": 0, > "name": "SERVICE_CHECK", > "progress_percent": 100, > "request_id": 64, > "status": "ABORTED", > "title": "All Service Checks", > "total_task_count": 22 > } > }, > { > "href": > "http://172.22.65.131:8080/api/v1/clusters/cl1/upgrades/64/upgrade_groups/12", > "UpgradeGroup": { > "completed_task_count": 10, > "group_id": 12, > "in_progress_task_count": 0, > "name": "KNOX", > "progress_percent": 100, > "request_id": 64, > "status": "ABORTED", > "title": "Knox", > "total_task_count": 10 > } > }, > { > "href": > "http://172.22.65.131:8080/api/v1/clusters/cl1/upgrades/64/upgrade_groups/13", > "UpgradeGroup": { > "completed_task_count": 40, > "group_id": 13, > "in_progress_task_count": 0, > "name": "SLIDER", > "progress_percent": 100, > "request_id": 64, > "status": "ABORTED", > "title": "Slider", > "total_task_count": 40 > } > }, > { > "href": > "http://172.22.65.131:8080/api/v1/clusters/cl1/upgrades/64/upgrade_groups/14", > "UpgradeGroup": { > "completed_task_count": 42, > "group_id": 14, > "in_progress_task_count": 0, > "name": "FLUME", > "progress_percent": 100, > "request_id": 64, > "status": "ABORTED", > "title": "Flume", > "total_task_count": 42 > } > }, > { > "href": > "http://172.22.65.131:8080/api/v1/clusters/cl1/upgrades/64/upgrade_groups/15", > "UpgradeGroup": { > "completed_task_count": 40, > "group_id": 15, > "in_progress_task_count": 0, > "name": "ALL_HOST_OPS", > "progress_percent": 100, > "request_id": 64, > "status": "ABORTED", > "title": "Finalize Hosts", > "total_task_count": 40 > } > }, > { > "href": > "http://172.22.65.131:8080/api/v1/clusters/cl1/upgrades/64/upgrade_groups/16", > "UpgradeGroup": { > "completed_task_count": 6, > "group_id": 16, > "in_progress_task_count": 0, > "name": "POST_CLUSTER", > "progress_percent": 100, > "request_id": 64, > "status": "ABORTED", > "title": "Finalize Upgrade", > "total_task_count": 6 > } > } > ] > } > {code} -- This message was sent by Atlassian JIRA (v6.3.4#6332)