AMBARI-22588. Hive Client restart fails: hadooplzo package not present in IOP repos. (swagle)
Project: http://git-wip-us.apache.org/repos/asf/ambari/repo Commit: http://git-wip-us.apache.org/repos/asf/ambari/commit/ab1d01bd Tree: http://git-wip-us.apache.org/repos/asf/ambari/tree/ab1d01bd Diff: http://git-wip-us.apache.org/repos/asf/ambari/diff/ab1d01bd Branch: refs/heads/branch-2.6 Commit: ab1d01bd1872a20a02c467894b1a294dfe96ba1b Parents: dc969b4 Author: Siddharth Wagle <swa...@hortonworks.com> Authored: Mon Dec 4 13:51:54 2017 -0800 Committer: Siddharth Wagle <swa...@hortonworks.com> Committed: Mon Dec 4 13:52:06 2017 -0800 ---------------------------------------------------------------------- .../alert_hive_interactive_thrift_port.py | 230 +++ .../HIVE/package/alerts/alert_hive_metastore.py | 276 ++++ .../package/alerts/alert_hive_thrift_port.py | 286 ++++ .../package/alerts/alert_llap_app_status.py | 303 ++++ .../HIVE/package/alerts/alert_webhcat_server.py | 228 +++ .../package/etc/hive-schema-0.12.0.mysql.sql | 777 ++++++++++ .../package/etc/hive-schema-0.12.0.oracle.sql | 718 +++++++++ .../package/etc/hive-schema-0.12.0.postgres.sql | 1406 ++++++++++++++++++ .../services/HIVE/package/files/addMysqlUser.sh | 39 + .../services/HIVE/package/files/hcatSmoke.sh | 41 + .../services/HIVE/package/files/hiveSmoke.sh | 24 + .../HIVE/package/files/hiveTezSetup.cmd | 58 + .../services/HIVE/package/files/hiveserver2.sql | 23 + .../HIVE/package/files/hiveserver2Smoke.sh | 32 + .../services/HIVE/package/files/pigSmoke.sh | 18 + .../HIVE/package/files/removeMysqlUser.sh | 33 + .../HIVE/package/files/startMetastore.sh | 25 + .../HIVE/package/files/templetonSmoke.sh | 95 ++ .../services/HIVE/package/scripts/__init__.py | 19 + .../4.2.5/services/HIVE/package/scripts/hcat.py | 81 + .../HIVE/package/scripts/hcat_client.py | 79 + .../HIVE/package/scripts/hcat_service_check.py | 86 ++ .../4.2.5/services/HIVE/package/scripts/hive.py | 562 +++++++ .../HIVE/package/scripts/hive_client.py | 62 + .../HIVE/package/scripts/hive_interactive.py | 360 +++++ .../HIVE/package/scripts/hive_metastore.py | 203 +++ .../HIVE/package/scripts/hive_server.py | 161 ++ .../package/scripts/hive_server_interactive.py | 626 ++++++++ .../HIVE/package/scripts/hive_server_upgrade.py | 134 ++ .../HIVE/package/scripts/hive_service.py | 185 +++ .../package/scripts/hive_service_interactive.py | 108 ++ .../HIVE/package/scripts/mysql_server.py | 64 + .../HIVE/package/scripts/mysql_service.py | 49 + .../HIVE/package/scripts/mysql_users.py | 70 + .../HIVE/package/scripts/mysql_utils.py | 35 + .../services/HIVE/package/scripts/params.py | 30 + .../HIVE/package/scripts/params_linux.py | 873 +++++++++++ .../HIVE/package/scripts/params_windows.py | 74 + .../HIVE/package/scripts/service_check.py | 192 +++ .../HIVE/package/scripts/setup_ranger_hive.py | 156 ++ .../scripts/setup_ranger_hive_interactive.py | 77 + .../HIVE/package/scripts/status_params.py | 124 ++ .../services/HIVE/package/scripts/webhcat.py | 135 ++ .../HIVE/package/scripts/webhcat_server.py | 88 ++ .../HIVE/package/scripts/webhcat_service.py | 96 ++ .../package/scripts/webhcat_service_check.py | 128 ++ .../hadoop-metrics2-hivemetastore.properties.j2 | 56 + .../hadoop-metrics2-hiveserver2.properties.j2 | 55 + .../templates/hadoop-metrics2-llapdaemon.j2 | 54 + .../hadoop-metrics2-llaptaskscheduler.j2 | 54 + .../HIVE/package/templates/hive.conf.j2 | 35 + .../package/templates/startHiveserver2.sh.j2 | 24 + .../templates/startHiveserver2Interactive.sh.j2 | 24 + .../package/templates/templeton_smoke.pig.j2 | 24 + .../package/templates/zkmigrator_jaas.conf.j2 | 26 + .../services/OOZIE/package/scripts/oozie.py | 11 +- .../4.2/services/HDFS/package/scripts/hdfs.py | 3 +- .../4.2/services/HIVE/package/scripts/hive.py | 3 +- .../4.2/services/OOZIE/package/scripts/oozie.py | 13 +- .../4.2/services/YARN/package/scripts/yarn.py | 5 +- 60 files changed, 9841 insertions(+), 15 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/ambari/blob/ab1d01bd/ambari-server/src/main/resources/stacks/BigInsights/4.2.5/services/HIVE/package/alerts/alert_hive_interactive_thrift_port.py ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/resources/stacks/BigInsights/4.2.5/services/HIVE/package/alerts/alert_hive_interactive_thrift_port.py b/ambari-server/src/main/resources/stacks/BigInsights/4.2.5/services/HIVE/package/alerts/alert_hive_interactive_thrift_port.py new file mode 100644 index 0000000..8d48412 --- /dev/null +++ b/ambari-server/src/main/resources/stacks/BigInsights/4.2.5/services/HIVE/package/alerts/alert_hive_interactive_thrift_port.py @@ -0,0 +1,230 @@ +#!/usr/bin/env python + +""" +Licensed to the Apache Software Foundation (ASF) under one +or more contributor license agreements. See the NOTICE file +distributed with this work for additional information +regarding copyright ownership. The ASF licenses this file +to you under the Apache License, Version 2.0 (the +"License"); you may not use this file except in compliance +with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +""" + +import os +import socket +import time +import logging +import traceback +from resource_management.libraries.functions import hive_check +from resource_management.libraries.functions import format +from resource_management.libraries.functions import get_kinit_path +from ambari_commons.os_check import OSConst +from ambari_commons.os_family_impl import OsFamilyFuncImpl, OsFamilyImpl + +OK_MESSAGE = "TCP OK - {0:.3f}s response on port {1}" +CRITICAL_MESSAGE = "Connection failed on host {0}:{1} ({2})" + +HIVE_SERVER_INTERACTIVE_THRIFT_PORT_KEY = '{{hive-interactive-site/hive.server2.thrift.port}}' +HIVE_SERVER_INTERACTIVE_THRIFT_HTTP_PORT_KEY = '{{hive-interactive-site/hive.server2.thrift.http.port}}' +HIVE_SERVER_INTERACTIVE_TRANSPORT_MODE_KEY = '{{hive-interactive-site/hive.server2.transport.mode}}' +HIVE_SERVER_TRANSPORT_MODE_KEY = '{{hive-site/hive.server2.transport.mode}}' +SECURITY_ENABLED_KEY = '{{cluster-env/security_enabled}}' +HIVE_SERVER2_INTERACTIVE_AUTHENTICATION_KEY = '{{hive-interactive-site/hive.server2.authentication}}' +HIVE_SERVER2_AUTHENTICATION_KEY = '{{hive-site/hive.server2.authentication}}' +HIVE_SERVER_INTERACTIVE_PRINCIPAL_KEY = '{{hive-site/hive.server2.authentication.kerberos.principal}}' +SMOKEUSER_KEYTAB_KEY = '{{cluster-env/smokeuser_keytab}}' +SMOKEUSER_PRINCIPAL_KEY = '{{cluster-env/smokeuser_principal_name}}' +SMOKEUSER_KEY = '{{cluster-env/smokeuser}}' +HIVE_SSL = '{{hive-site/hive.server2.use.SSL}}' +HIVE_SSL_KEYSTORE_PATH = '{{hive-interactive-site/hive.server2.keystore.path}}' +HIVE_SSL_KEYSTORE_PASSWORD = '{{hive-interactive-site/hive.server2.keystore.password}}' +HIVE_LDAP_USERNAME = '{{hive-env/alert_ldap_username}}' +HIVE_LDAP_PASSWORD = '{{hive-env/alert_ldap_password}}' + +# The configured Kerberos executable search paths, if any +KERBEROS_EXECUTABLE_SEARCH_PATHS_KEY = '{{kerberos-env/executable_search_paths}}' + +THRIFT_PORT_DEFAULT = 10500 +HIVE_SERVER_INTERACTIVE_TRANSPORT_MODE_DEFAULT = 'binary' +HIVE_SERVER_INTERACTIVE_PRINCIPAL_DEFAULT = 'hive/_h...@example.com' +HIVE_SERVER2_INTERACTIVE_AUTHENTICATION_DEFAULT = 'NOSASL' + +# default keytab location +SMOKEUSER_KEYTAB_SCRIPT_PARAM_KEY = 'default.smoke.keytab' +SMOKEUSER_KEYTAB_DEFAULT = '/etc/security/keytabs/smokeuser.headless.keytab' + +# default smoke principal +SMOKEUSER_PRINCIPAL_SCRIPT_PARAM_KEY = 'default.smoke.principal' +SMOKEUSER_PRINCIPAL_DEFAULT = 'ambari...@example.com' + +# default smoke user +SMOKEUSER_SCRIPT_PARAM_KEY = 'default.smoke.user' +SMOKEUSER_DEFAULT = 'ambari-qa' + +HADOOPUSER_KEY = '{{cluster-env/hadoop.user.name}}' +HADOOPUSER_DEFAULT = 'hadoop' + +CHECK_COMMAND_TIMEOUT_KEY = 'check.command.timeout' +CHECK_COMMAND_TIMEOUT_DEFAULT = 60.0 + +logger = logging.getLogger('ambari_alerts') + +@OsFamilyFuncImpl(os_family=OsFamilyImpl.DEFAULT) +def get_tokens(): + """ + Returns a tuple of tokens in the format {{site/property}} that will be used + to build the dictionary passed into execute + """ + return (HIVE_SERVER_INTERACTIVE_THRIFT_PORT_KEY, SECURITY_ENABLED_KEY, SMOKEUSER_KEY, + HIVE_SERVER2_INTERACTIVE_AUTHENTICATION_KEY, HIVE_SERVER2_AUTHENTICATION_KEY, + HIVE_SERVER_INTERACTIVE_PRINCIPAL_KEY, SMOKEUSER_KEYTAB_KEY, SMOKEUSER_PRINCIPAL_KEY, + HIVE_SERVER_INTERACTIVE_THRIFT_HTTP_PORT_KEY, HIVE_SERVER_INTERACTIVE_TRANSPORT_MODE_KEY, + HIVE_SERVER_TRANSPORT_MODE_KEY, KERBEROS_EXECUTABLE_SEARCH_PATHS_KEY, HIVE_SSL, + HIVE_SSL_KEYSTORE_PATH, HIVE_SSL_KEYSTORE_PASSWORD, HIVE_LDAP_USERNAME, HIVE_LDAP_PASSWORD) + + +@OsFamilyFuncImpl(os_family=OSConst.WINSRV_FAMILY) +def get_tokens(): + pass + +@OsFamilyFuncImpl(os_family=OsFamilyImpl.DEFAULT) +def execute(configurations={}, parameters={}, host_name=None): + """ + Returns a tuple containing the result code and a pre-formatted result label + + Keyword arguments: + configurations (dictionary): a mapping of configuration key to value + parameters (dictionary): a mapping of script parameter key to value + host_name (string): the name of this host where the alert is running + """ + + if configurations is None: + return ('UNKNOWN', ['There were no configurations supplied to the script.']) + + transport_mode = HIVE_SERVER_INTERACTIVE_TRANSPORT_MODE_DEFAULT + if HIVE_SERVER_INTERACTIVE_TRANSPORT_MODE_KEY in configurations: + transport_mode = configurations[HIVE_SERVER_INTERACTIVE_TRANSPORT_MODE_KEY] + elif HIVE_SERVER_TRANSPORT_MODE_KEY in configurations: + transport_mode = configurations[HIVE_SERVER_TRANSPORT_MODE_KEY] + + port = THRIFT_PORT_DEFAULT + if transport_mode.lower() == 'binary' and HIVE_SERVER_INTERACTIVE_THRIFT_PORT_KEY in configurations: + port = int(configurations[HIVE_SERVER_INTERACTIVE_THRIFT_PORT_KEY]) + elif transport_mode.lower() == 'http' and HIVE_SERVER_INTERACTIVE_THRIFT_HTTP_PORT_KEY in configurations: + port = int(configurations[HIVE_SERVER_INTERACTIVE_THRIFT_HTTP_PORT_KEY]) + + security_enabled = False + if SECURITY_ENABLED_KEY in configurations: + security_enabled = str(configurations[SECURITY_ENABLED_KEY]).upper() == 'TRUE' + + check_command_timeout = CHECK_COMMAND_TIMEOUT_DEFAULT + if CHECK_COMMAND_TIMEOUT_KEY in parameters: + check_command_timeout = float(parameters[CHECK_COMMAND_TIMEOUT_KEY]) + + hive_server2_authentication = HIVE_SERVER2_INTERACTIVE_AUTHENTICATION_DEFAULT + if HIVE_SERVER2_INTERACTIVE_AUTHENTICATION_KEY in configurations: + hive_server2_authentication = configurations[HIVE_SERVER2_INTERACTIVE_AUTHENTICATION_KEY] + elif HIVE_SERVER2_AUTHENTICATION_KEY in configurations: + hive_server2_authentication = configurations[HIVE_SERVER2_AUTHENTICATION_KEY] + + hive_ssl = False + if HIVE_SSL in configurations: + hive_ssl = configurations[HIVE_SSL] + + hive_ssl_keystore_path = None + if HIVE_SSL_KEYSTORE_PATH in configurations: + hive_ssl_keystore_path = configurations[HIVE_SSL_KEYSTORE_PATH] + + hive_ssl_keystore_password = None + if HIVE_SSL_KEYSTORE_PASSWORD in configurations: + hive_ssl_keystore_password = configurations[HIVE_SSL_KEYSTORE_PASSWORD] + + # defaults + smokeuser_keytab = SMOKEUSER_KEYTAB_DEFAULT + smokeuser_principal = SMOKEUSER_PRINCIPAL_DEFAULT + smokeuser = SMOKEUSER_DEFAULT + + # check script params + if SMOKEUSER_PRINCIPAL_SCRIPT_PARAM_KEY in parameters: + smokeuser_principal = parameters[SMOKEUSER_PRINCIPAL_SCRIPT_PARAM_KEY] + + if SMOKEUSER_SCRIPT_PARAM_KEY in parameters: + smokeuser = parameters[SMOKEUSER_SCRIPT_PARAM_KEY] + + if SMOKEUSER_KEYTAB_SCRIPT_PARAM_KEY in parameters: + smokeuser_keytab = parameters[SMOKEUSER_KEYTAB_SCRIPT_PARAM_KEY] + + + # check configurations last as they should always take precedence + if SMOKEUSER_PRINCIPAL_KEY in configurations: + smokeuser_principal = configurations[SMOKEUSER_PRINCIPAL_KEY] + + if SMOKEUSER_KEY in configurations: + smokeuser = configurations[SMOKEUSER_KEY] + + ldap_username = "" + ldap_password = "" + if HIVE_LDAP_USERNAME in configurations: + ldap_username = configurations[HIVE_LDAP_USERNAME] + if HIVE_LDAP_PASSWORD in configurations: + ldap_password = configurations[HIVE_LDAP_PASSWORD] + + result_code = None + + if security_enabled: + hive_server_principal = HIVE_SERVER_INTERACTIVE_PRINCIPAL_DEFAULT + if HIVE_SERVER_INTERACTIVE_PRINCIPAL_KEY in configurations: + hive_server_principal = configurations[HIVE_SERVER_INTERACTIVE_PRINCIPAL_KEY] + + if SMOKEUSER_KEYTAB_KEY in configurations: + smokeuser_keytab = configurations[SMOKEUSER_KEYTAB_KEY] + + # Get the configured Kerberos executable search paths, if any + if KERBEROS_EXECUTABLE_SEARCH_PATHS_KEY in configurations: + kerberos_executable_search_paths = configurations[KERBEROS_EXECUTABLE_SEARCH_PATHS_KEY] + else: + kerberos_executable_search_paths = None + + kinit_path_local = get_kinit_path(kerberos_executable_search_paths) + kinitcmd=format("{kinit_path_local} -kt {smokeuser_keytab} {smokeuser_principal}; ") + else: + hive_server_principal = None + kinitcmd=None + + try: + if host_name is None: + host_name = socket.getfqdn() + + start_time = time.time() + + try: + hive_check.check_thrift_port_sasl(host_name, port, hive_server2_authentication, hive_server_principal, + kinitcmd, smokeuser, transport_mode=transport_mode, ssl=hive_ssl, + ssl_keystore=hive_ssl_keystore_path, ssl_password=hive_ssl_keystore_password, + check_command_timeout=int(check_command_timeout), ldap_username=ldap_username, + ldap_password=ldap_password) + result_code = 'OK' + total_time = time.time() - start_time + label = OK_MESSAGE.format(total_time, port) + except: + result_code = 'CRITICAL' + label = CRITICAL_MESSAGE.format(host_name, port, traceback.format_exc()) + + except: + label = traceback.format_exc() + result_code = 'UNKNOWN' + + return (result_code, [label]) + + +@OsFamilyFuncImpl(os_family=OSConst.WINSRV_FAMILY) +def execute(configurations={}, parameters={}, host_name=None): + pass \ No newline at end of file http://git-wip-us.apache.org/repos/asf/ambari/blob/ab1d01bd/ambari-server/src/main/resources/stacks/BigInsights/4.2.5/services/HIVE/package/alerts/alert_hive_metastore.py ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/resources/stacks/BigInsights/4.2.5/services/HIVE/package/alerts/alert_hive_metastore.py b/ambari-server/src/main/resources/stacks/BigInsights/4.2.5/services/HIVE/package/alerts/alert_hive_metastore.py new file mode 100644 index 0000000..5b4fd68 --- /dev/null +++ b/ambari-server/src/main/resources/stacks/BigInsights/4.2.5/services/HIVE/package/alerts/alert_hive_metastore.py @@ -0,0 +1,276 @@ +#!/usr/bin/env python + +""" +Licensed to the Apache Software Foundation (ASF) under one +or more contributor license agreements. See the NOTICE file +distributed with this work for additional information +regarding copyright ownership. The ASF licenses this file +to you under the Apache License, Version 2.0 (the +"License"); you may not use this file except in compliance +with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +""" + +import os +import socket +import time +import traceback +import logging + +from resource_management.core import global_lock +from resource_management.libraries.functions import format +from resource_management.libraries.functions import get_kinit_path +from resource_management.libraries.functions import stack_tools +from resource_management.core.resources import Execute +from resource_management.core.signal_utils import TerminateStrategy +from ambari_commons.os_check import OSConst +from ambari_commons.os_family_impl import OsFamilyFuncImpl, OsFamilyImpl + + +OK_MESSAGE = "Metastore OK - Hive command took {0:.3f}s" +CRITICAL_MESSAGE = "Metastore on {0} failed ({1})" +SECURITY_ENABLED_KEY = '{{cluster-env/security_enabled}}' +SMOKEUSER_KEYTAB_KEY = '{{cluster-env/smokeuser_keytab}}' +SMOKEUSER_PRINCIPAL_KEY = '{{cluster-env/smokeuser_principal_name}}' +SMOKEUSER_KEY = '{{cluster-env/smokeuser}}' +HIVE_METASTORE_URIS_KEY = '{{hive-site/hive.metastore.uris}}' + +# The configured Kerberos executable search paths, if any +KERBEROS_EXECUTABLE_SEARCH_PATHS_KEY = '{{kerberos-env/executable_search_paths}}' + +# default keytab location +SMOKEUSER_KEYTAB_SCRIPT_PARAM_KEY = 'default.smoke.keytab' +SMOKEUSER_KEYTAB_DEFAULT = '/etc/security/keytabs/smokeuser.headless.keytab' + +# default smoke principal +SMOKEUSER_PRINCIPAL_SCRIPT_PARAM_KEY = 'default.smoke.principal' +SMOKEUSER_PRINCIPAL_DEFAULT = 'ambari...@example.com' + +# default smoke user +SMOKEUSER_SCRIPT_PARAM_KEY = 'default.smoke.user' +SMOKEUSER_DEFAULT = 'ambari-qa' + +STACK_NAME = '{{cluster-env/stack_name}}' +STACK_ROOT = '{{cluster-env/stack_root}}' + +HIVE_CONF_DIR_LEGACY = '/etc/hive/conf.server' + +HIVE_BIN_DIR_LEGACY = '/usr/lib/hive/bin' + +CHECK_COMMAND_TIMEOUT_KEY = 'check.command.timeout' +CHECK_COMMAND_TIMEOUT_DEFAULT = 60.0 + +HADOOPUSER_KEY = '{{cluster-env/hadoop.user.name}}' +HADOOPUSER_DEFAULT = 'hadoop' + +logger = logging.getLogger('ambari_alerts') + +@OsFamilyFuncImpl(os_family=OsFamilyImpl.DEFAULT) +def get_tokens(): + """ + Returns a tuple of tokens in the format {{site/property}} that will be used + to build the dictionary passed into execute + """ + return (SECURITY_ENABLED_KEY,SMOKEUSER_KEYTAB_KEY,SMOKEUSER_PRINCIPAL_KEY, + HIVE_METASTORE_URIS_KEY, SMOKEUSER_KEY, KERBEROS_EXECUTABLE_SEARCH_PATHS_KEY, + STACK_NAME, STACK_ROOT) + +@OsFamilyFuncImpl(os_family=OSConst.WINSRV_FAMILY) +def get_tokens(): + """ + Returns a tuple of tokens in the format {{site/property}} that will be used + to build the dictionary passed into execute + """ + return (HIVE_METASTORE_URIS_KEY, HADOOPUSER_KEY) + +@OsFamilyFuncImpl(os_family=OsFamilyImpl.DEFAULT) +def execute(configurations={}, parameters={}, host_name=None): + """ + Returns a tuple containing the result code and a pre-formatted result label + + Keyword arguments: + configurations (dictionary): a mapping of configuration key to value + parameters (dictionary): a mapping of script parameter key to value + host_name (string): the name of this host where the alert is running + """ + + if configurations is None: + return (('UNKNOWN', ['There were no configurations supplied to the script.'])) + + if not HIVE_METASTORE_URIS_KEY in configurations: + return (('UNKNOWN', ['Hive metastore uris were not supplied to the script.'])) + + metastore_uris = configurations[HIVE_METASTORE_URIS_KEY].split(',') + + security_enabled = False + if SECURITY_ENABLED_KEY in configurations: + security_enabled = str(configurations[SECURITY_ENABLED_KEY]).upper() == 'TRUE' + + check_command_timeout = CHECK_COMMAND_TIMEOUT_DEFAULT + if CHECK_COMMAND_TIMEOUT_KEY in parameters: + check_command_timeout = float(parameters[CHECK_COMMAND_TIMEOUT_KEY]) + + # defaults + smokeuser_keytab = SMOKEUSER_KEYTAB_DEFAULT + smokeuser_principal = SMOKEUSER_PRINCIPAL_DEFAULT + smokeuser = SMOKEUSER_DEFAULT + + # check script params + if SMOKEUSER_PRINCIPAL_SCRIPT_PARAM_KEY in parameters: + smokeuser_principal = parameters[SMOKEUSER_PRINCIPAL_SCRIPT_PARAM_KEY] + + if SMOKEUSER_SCRIPT_PARAM_KEY in parameters: + smokeuser = parameters[SMOKEUSER_SCRIPT_PARAM_KEY] + + if SMOKEUSER_KEYTAB_SCRIPT_PARAM_KEY in parameters: + smokeuser_keytab = parameters[SMOKEUSER_KEYTAB_SCRIPT_PARAM_KEY] + + + # check configurations last as they should always take precedence + if SMOKEUSER_PRINCIPAL_KEY in configurations: + smokeuser_principal = configurations[SMOKEUSER_PRINCIPAL_KEY] + + if SMOKEUSER_KEY in configurations: + smokeuser = configurations[SMOKEUSER_KEY] + + result_code = None + + try: + if security_enabled: + if SMOKEUSER_KEYTAB_KEY in configurations: + smokeuser_keytab = configurations[SMOKEUSER_KEYTAB_KEY] + + # Get the configured Kerberos executable search paths, if any + if KERBEROS_EXECUTABLE_SEARCH_PATHS_KEY in configurations: + kerberos_executable_search_paths = configurations[KERBEROS_EXECUTABLE_SEARCH_PATHS_KEY] + else: + kerberos_executable_search_paths = None + + kinit_path_local = get_kinit_path(kerberos_executable_search_paths) + kinitcmd=format("{kinit_path_local} -kt {smokeuser_keytab} {smokeuser_principal}; ") + + # prevent concurrent kinit + kinit_lock = global_lock.get_lock(global_lock.LOCK_TYPE_KERBEROS) + kinit_lock.acquire() + try: + Execute(kinitcmd, user=smokeuser, + path=["/bin/", "/usr/bin/", "/usr/lib/hive/bin/", "/usr/sbin/"], + timeout=10) + finally: + kinit_lock.release() + + if host_name is None: + host_name = socket.getfqdn() + + for uri in metastore_uris: + if host_name in uri: + metastore_uri = uri + + conf_dir = HIVE_CONF_DIR_LEGACY + bin_dir = HIVE_BIN_DIR_LEGACY + + + if STACK_NAME in configurations and STACK_ROOT in configurations: + stack_root = stack_tools.get_stack_root(configurations[STACK_NAME], configurations[STACK_ROOT]) + hive_conf_dir = stack_root + format("/current/hive-metastore/conf") + hive_bin_dir = stack_root + format("/current/hive-metastore/bin") + + if os.path.exists(hive_conf_dir): + conf_dir = hive_conf_dir + bin_dir = hive_bin_dir + + cmd = format("export HIVE_CONF_DIR='{conf_dir}' ; " + "hive --hiveconf hive.metastore.uris={metastore_uri}\ + --hiveconf hive.metastore.client.connect.retry.delay=1\ + --hiveconf hive.metastore.failure.retries=1\ + --hiveconf hive.metastore.connect.retries=1\ + --hiveconf hive.metastore.client.socket.timeout=14\ + --hiveconf hive.execution.engine=mr -e 'show databases;'") + + start_time = time.time() + + try: + Execute(cmd, user=smokeuser, + path=["/bin/", "/usr/bin/", "/usr/sbin/", bin_dir], + timeout=int(check_command_timeout), + timeout_kill_strategy=TerminateStrategy.KILL_PROCESS_TREE, + ) + + total_time = time.time() - start_time + + result_code = 'OK' + label = OK_MESSAGE.format(total_time) + except: + result_code = 'CRITICAL' + label = CRITICAL_MESSAGE.format(host_name, traceback.format_exc()) + + except: + label = traceback.format_exc() + result_code = 'UNKNOWN' + + return ((result_code, [label])) + +@OsFamilyFuncImpl(os_family=OSConst.WINSRV_FAMILY) +def execute(configurations={}, parameters={}, host_name=None): + """ + Returns a tuple containing the result code and a pre-formatted result label + + Keyword arguments: + configurations (dictionary): a mapping of configuration key to value + parameters (dictionary): a mapping of script parameter key to value + host_name (string): the name of this host where the alert is running + """ + + from resource_management.libraries.functions import reload_windows_env + reload_windows_env() + hive_home = os.environ['HIVE_HOME'] + + if configurations is None: + return (('UNKNOWN', ['There were no configurations supplied to the script.'])) + if not HIVE_METASTORE_URIS_KEY in configurations: + return (('UNKNOWN', ['Hive metastore uris were not supplied to the script.'])) + + metastore_uris = configurations[HIVE_METASTORE_URIS_KEY].split(',') + + # defaults + hiveuser = HADOOPUSER_DEFAULT + + if HADOOPUSER_KEY in configurations: + hiveuser = configurations[HADOOPUSER_KEY] + + result_code = None + try: + if host_name is None: + host_name = socket.getfqdn() + for uri in metastore_uris: + if host_name in uri: + metastore_uri = uri + + hive_cmd = os.path.join(hive_home, "bin", "hive.cmd") + cmd = format("cmd /c {hive_cmd} --hiveconf hive.metastore.uris={metastore_uri}\ + --hiveconf hive.metastore.client.connect.retry.delay=1\ + --hiveconf hive.metastore.failure.retries=1\ + --hiveconf hive.metastore.connect.retries=1\ + --hiveconf hive.metastore.client.socket.timeout=14\ + --hiveconf hive.execution.engine=mr -e 'show databases;'") + start_time = time.time() + try: + Execute(cmd, user=hiveuser, timeout=30) + total_time = time.time() - start_time + result_code = 'OK' + label = OK_MESSAGE.format(total_time) + except: + result_code = 'CRITICAL' + label = CRITICAL_MESSAGE.format(host_name, traceback.format_exc()) + except: + label = traceback.format_exc() + result_code = 'UNKNOWN' + + return ((result_code, [label])) http://git-wip-us.apache.org/repos/asf/ambari/blob/ab1d01bd/ambari-server/src/main/resources/stacks/BigInsights/4.2.5/services/HIVE/package/alerts/alert_hive_thrift_port.py ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/resources/stacks/BigInsights/4.2.5/services/HIVE/package/alerts/alert_hive_thrift_port.py b/ambari-server/src/main/resources/stacks/BigInsights/4.2.5/services/HIVE/package/alerts/alert_hive_thrift_port.py new file mode 100644 index 0000000..6db92b0 --- /dev/null +++ b/ambari-server/src/main/resources/stacks/BigInsights/4.2.5/services/HIVE/package/alerts/alert_hive_thrift_port.py @@ -0,0 +1,286 @@ +#!/usr/bin/env python + +""" +Licensed to the Apache Software Foundation (ASF) under one +or more contributor license agreements. See the NOTICE file +distributed with this work for additional information +regarding copyright ownership. The ASF licenses this file +to you under the Apache License, Version 2.0 (the +"License"); you may not use this file except in compliance +with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +""" + +import os +import socket +import time +import logging +import traceback +from resource_management.libraries.functions import hive_check +from resource_management.libraries.functions import format +from resource_management.libraries.functions import get_kinit_path +from ambari_commons.os_check import OSConst +from ambari_commons.os_family_impl import OsFamilyFuncImpl, OsFamilyImpl +from resource_management.core.signal_utils import TerminateStrategy + +OK_MESSAGE = "TCP OK - {0:.3f}s response on port {1}" +CRITICAL_MESSAGE = "Connection failed on host {0}:{1} ({2})" + +HIVE_SERVER_THRIFT_PORT_KEY = '{{hive-site/hive.server2.thrift.port}}' +HIVE_SERVER_THRIFT_HTTP_PORT_KEY = '{{hive-site/hive.server2.thrift.http.port}}' +HIVE_SERVER_TRANSPORT_MODE_KEY = '{{hive-site/hive.server2.transport.mode}}' +SECURITY_ENABLED_KEY = '{{cluster-env/security_enabled}}' +HIVE_SERVER2_AUTHENTICATION_KEY = '{{hive-site/hive.server2.authentication}}' +HIVE_SERVER_PRINCIPAL_KEY = '{{hive-site/hive.server2.authentication.kerberos.principal}}' +SMOKEUSER_KEYTAB_KEY = '{{cluster-env/smokeuser_keytab}}' +SMOKEUSER_PRINCIPAL_KEY = '{{cluster-env/smokeuser_principal_name}}' +SMOKEUSER_KEY = '{{cluster-env/smokeuser}}' +HIVE_SSL = '{{hive-site/hive.server2.use.SSL}}' +HIVE_SSL_KEYSTORE_PATH = '{{hive-site/hive.server2.keystore.path}}' +HIVE_SSL_KEYSTORE_PASSWORD = '{{hive-site/hive.server2.keystore.password}}' +HIVE_LDAP_USERNAME = '{{hive-env/alert_ldap_username}}' +HIVE_LDAP_PASSWORD = '{{hive-env/alert_ldap_password}}' + + +# The configured Kerberos executable search paths, if any +KERBEROS_EXECUTABLE_SEARCH_PATHS_KEY = '{{kerberos-env/executable_search_paths}}' + +THRIFT_PORT_DEFAULT = 10000 +HIVE_SERVER_TRANSPORT_MODE_DEFAULT = 'binary' +HIVE_SERVER_PRINCIPAL_DEFAULT = 'hive/_h...@example.com' +HIVE_SERVER2_AUTHENTICATION_DEFAULT = 'NOSASL' + +# default keytab location +SMOKEUSER_KEYTAB_SCRIPT_PARAM_KEY = 'default.smoke.keytab' +SMOKEUSER_KEYTAB_DEFAULT = '/etc/security/keytabs/smokeuser.headless.keytab' + +# default smoke principal +SMOKEUSER_PRINCIPAL_SCRIPT_PARAM_KEY = 'default.smoke.principal' +SMOKEUSER_PRINCIPAL_DEFAULT = 'ambari...@example.com' + +# default smoke user +SMOKEUSER_SCRIPT_PARAM_KEY = 'default.smoke.user' +SMOKEUSER_DEFAULT = 'ambari-qa' + +HADOOPUSER_KEY = '{{cluster-env/hadoop.user.name}}' +HADOOPUSER_DEFAULT = 'hadoop' + +CHECK_COMMAND_TIMEOUT_KEY = 'check.command.timeout' +CHECK_COMMAND_TIMEOUT_DEFAULT = 60.0 + +logger = logging.getLogger('ambari_alerts') + +@OsFamilyFuncImpl(os_family=OsFamilyImpl.DEFAULT) +def get_tokens(): + """ + Returns a tuple of tokens in the format {{site/property}} that will be used + to build the dictionary passed into execute + """ + return (HIVE_SERVER_THRIFT_PORT_KEY, SECURITY_ENABLED_KEY, SMOKEUSER_KEY, + HIVE_SERVER2_AUTHENTICATION_KEY, HIVE_SERVER_PRINCIPAL_KEY, + SMOKEUSER_KEYTAB_KEY, SMOKEUSER_PRINCIPAL_KEY, HIVE_SERVER_THRIFT_HTTP_PORT_KEY, + HIVE_SERVER_TRANSPORT_MODE_KEY, KERBEROS_EXECUTABLE_SEARCH_PATHS_KEY, HIVE_SSL, + HIVE_SSL_KEYSTORE_PATH, HIVE_SSL_KEYSTORE_PASSWORD, HIVE_LDAP_USERNAME, HIVE_LDAP_PASSWORD) + +@OsFamilyFuncImpl(os_family=OSConst.WINSRV_FAMILY) +def get_tokens(): + """ + Returns a tuple of tokens in the format {{site/property}} that will be used + to build the dictionary passed into execute + """ + return (HIVE_SERVER_THRIFT_PORT_KEY, HIVE_SERVER_THRIFT_HTTP_PORT_KEY, + HIVE_SERVER_TRANSPORT_MODE_KEY, HADOOPUSER_KEY) + +@OsFamilyFuncImpl(os_family=OsFamilyImpl.DEFAULT) +def execute(configurations={}, parameters={}, host_name=None): + """ + Returns a tuple containing the result code and a pre-formatted result label + + Keyword arguments: + configurations (dictionary): a mapping of configuration key to value + parameters (dictionary): a mapping of script parameter key to value + host_name (string): the name of this host where the alert is running + """ + + if configurations is None: + return ('UNKNOWN', ['There were no configurations supplied to the script.']) + + transport_mode = HIVE_SERVER_TRANSPORT_MODE_DEFAULT + if HIVE_SERVER_TRANSPORT_MODE_KEY in configurations: + transport_mode = configurations[HIVE_SERVER_TRANSPORT_MODE_KEY] + + port = THRIFT_PORT_DEFAULT + if transport_mode.lower() == 'binary' and HIVE_SERVER_THRIFT_PORT_KEY in configurations: + port = int(configurations[HIVE_SERVER_THRIFT_PORT_KEY]) + elif transport_mode.lower() == 'http' and HIVE_SERVER_THRIFT_HTTP_PORT_KEY in configurations: + port = int(configurations[HIVE_SERVER_THRIFT_HTTP_PORT_KEY]) + + security_enabled = False + if SECURITY_ENABLED_KEY in configurations: + security_enabled = str(configurations[SECURITY_ENABLED_KEY]).upper() == 'TRUE' + + check_command_timeout = CHECK_COMMAND_TIMEOUT_DEFAULT + if CHECK_COMMAND_TIMEOUT_KEY in parameters: + check_command_timeout = float(parameters[CHECK_COMMAND_TIMEOUT_KEY]) + + hive_server2_authentication = HIVE_SERVER2_AUTHENTICATION_DEFAULT + if HIVE_SERVER2_AUTHENTICATION_KEY in configurations: + hive_server2_authentication = configurations[HIVE_SERVER2_AUTHENTICATION_KEY] + + hive_ssl = False + if HIVE_SSL in configurations: + hive_ssl = configurations[HIVE_SSL] + + hive_ssl_keystore_path = None + if HIVE_SSL_KEYSTORE_PATH in configurations: + hive_ssl_keystore_path = configurations[HIVE_SSL_KEYSTORE_PATH] + + hive_ssl_keystore_password = None + if HIVE_SSL_KEYSTORE_PASSWORD in configurations: + hive_ssl_keystore_password = configurations[HIVE_SSL_KEYSTORE_PASSWORD] + + # defaults + smokeuser_keytab = SMOKEUSER_KEYTAB_DEFAULT + smokeuser_principal = SMOKEUSER_PRINCIPAL_DEFAULT + smokeuser = SMOKEUSER_DEFAULT + + # check script params + if SMOKEUSER_PRINCIPAL_SCRIPT_PARAM_KEY in parameters: + smokeuser_principal = parameters[SMOKEUSER_PRINCIPAL_SCRIPT_PARAM_KEY] + + if SMOKEUSER_SCRIPT_PARAM_KEY in parameters: + smokeuser = parameters[SMOKEUSER_SCRIPT_PARAM_KEY] + + if SMOKEUSER_KEYTAB_SCRIPT_PARAM_KEY in parameters: + smokeuser_keytab = parameters[SMOKEUSER_KEYTAB_SCRIPT_PARAM_KEY] + + + # check configurations last as they should always take precedence + if SMOKEUSER_PRINCIPAL_KEY in configurations: + smokeuser_principal = configurations[SMOKEUSER_PRINCIPAL_KEY] + + if SMOKEUSER_KEY in configurations: + smokeuser = configurations[SMOKEUSER_KEY] + + ldap_username = "" + ldap_password = "" + if HIVE_LDAP_USERNAME in configurations: + ldap_username = configurations[HIVE_LDAP_USERNAME] + if HIVE_LDAP_PASSWORD in configurations: + ldap_password = configurations[HIVE_LDAP_PASSWORD] + + result_code = None + + if security_enabled: + hive_server_principal = HIVE_SERVER_PRINCIPAL_DEFAULT + if HIVE_SERVER_PRINCIPAL_KEY in configurations: + hive_server_principal = configurations[HIVE_SERVER_PRINCIPAL_KEY] + + if SMOKEUSER_KEYTAB_KEY in configurations: + smokeuser_keytab = configurations[SMOKEUSER_KEYTAB_KEY] + + # Get the configured Kerberos executable search paths, if any + if KERBEROS_EXECUTABLE_SEARCH_PATHS_KEY in configurations: + kerberos_executable_search_paths = configurations[KERBEROS_EXECUTABLE_SEARCH_PATHS_KEY] + else: + kerberos_executable_search_paths = None + + kinit_path_local = get_kinit_path(kerberos_executable_search_paths) + kinitcmd=format("{kinit_path_local} -kt {smokeuser_keytab} {smokeuser_principal}; ") + else: + hive_server_principal = None + kinitcmd=None + + try: + if host_name is None: + host_name = socket.getfqdn() + + start_time = time.time() + + try: + hive_check.check_thrift_port_sasl(host_name, port, hive_server2_authentication, hive_server_principal, + kinitcmd, smokeuser, transport_mode=transport_mode, ssl=hive_ssl, + ssl_keystore=hive_ssl_keystore_path, ssl_password=hive_ssl_keystore_password, + check_command_timeout=int(check_command_timeout),ldap_username=ldap_username, + ldap_password=ldap_password) + result_code = 'OK' + total_time = time.time() - start_time + label = OK_MESSAGE.format(total_time, port) + except: + result_code = 'CRITICAL' + label = CRITICAL_MESSAGE.format(host_name, port, traceback.format_exc()) + + except: + label = traceback.format_exc() + result_code = 'UNKNOWN' + + return (result_code, [label]) + + +@OsFamilyFuncImpl(os_family=OSConst.WINSRV_FAMILY) +def execute(configurations={}, parameters={}, host_name=None): + """ + Returns a tuple containing the result code and a pre-formatted result label + + Keyword arguments: + configurations (dictionary): a mapping of configuration key to value + parameters (dictionary): a mapping of script parameter key to value + host_name (string): the name of this host where the alert is running + """ + + from resource_management.libraries.functions import reload_windows_env + from resource_management.core.resources import Execute + reload_windows_env() + hive_home = os.environ['HIVE_HOME'] + + if configurations is None: + return ('UNKNOWN', ['There were no configurations supplied to the script.']) + + transport_mode = HIVE_SERVER_TRANSPORT_MODE_DEFAULT + if HIVE_SERVER_TRANSPORT_MODE_KEY in configurations: + transport_mode = configurations[HIVE_SERVER_TRANSPORT_MODE_KEY] + + port = THRIFT_PORT_DEFAULT + if transport_mode.lower() == 'binary' and HIVE_SERVER_THRIFT_PORT_KEY in configurations: + port = int(configurations[HIVE_SERVER_THRIFT_PORT_KEY]) + elif transport_mode.lower() == 'http' and HIVE_SERVER_THRIFT_HTTP_PORT_KEY in configurations: + port = int(configurations[HIVE_SERVER_THRIFT_HTTP_PORT_KEY]) + + hiveuser = HADOOPUSER_DEFAULT + if HADOOPUSER_KEY in configurations: + hiveuser = configurations[HADOOPUSER_KEY] + + result_code = None + try: + if host_name is None: + host_name = socket.getfqdn() + + beeline_url = ['jdbc:hive2://{host_name}:{port}/', "transportMode={transport_mode}"] + # append url according to used transport + if transport_mode == "http": + beeline_url.append('httpPath=cliservice') + beeline_url_string = format(";".join(beeline_url)) + beeline_cmd = os.path.join(hive_home, "bin", "beeline.cmd") + cmd = format("cmd /c {beeline_cmd} -u {beeline_url_string} -e '' 2>&1 | findstr Connected") + + start_time = time.time() + try: + Execute(cmd, user=hiveuser, timeout=30, timeout_kill_strategy=TerminateStrategy.KILL_PROCESS_TREE) + total_time = time.time() - start_time + result_code = 'OK' + label = OK_MESSAGE.format(total_time, port) + except: + result_code = 'CRITICAL' + label = CRITICAL_MESSAGE.format(host_name, port, traceback.format_exc()) + except: + label = traceback.format_exc() + result_code = 'UNKNOWN' + + return (result_code, [label]) http://git-wip-us.apache.org/repos/asf/ambari/blob/ab1d01bd/ambari-server/src/main/resources/stacks/BigInsights/4.2.5/services/HIVE/package/alerts/alert_llap_app_status.py ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/resources/stacks/BigInsights/4.2.5/services/HIVE/package/alerts/alert_llap_app_status.py b/ambari-server/src/main/resources/stacks/BigInsights/4.2.5/services/HIVE/package/alerts/alert_llap_app_status.py new file mode 100644 index 0000000..e46c896 --- /dev/null +++ b/ambari-server/src/main/resources/stacks/BigInsights/4.2.5/services/HIVE/package/alerts/alert_llap_app_status.py @@ -0,0 +1,303 @@ +#!/usr/bin/env python + +""" +Licensed to the Apache Software Foundation (ASF) under one +or more contributor license agreements. See the NOTICE file +distributed with this work for additional information +regarding copyright ownership. The ASF licenses this file +to you under the Apache License, Version 2.0 (the +"License"); you may not use this file except in compliance +with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +""" + +import time +import logging +import traceback +import json +import subprocess + +from resource_management.libraries.functions import format +from resource_management.libraries.functions import get_kinit_path +from resource_management.libraries.functions import stack_tools +from ambari_commons.os_family_impl import OsFamilyFuncImpl, OsFamilyImpl +from resource_management.core import shell +from resource_management.core.resources import Execute +from resource_management.core import global_lock +from resource_management.core.exceptions import Fail +from resource_management.libraries.script.script import Script + +OK_MESSAGE = "The application reported a '{0}' state in {1:.3f}s" +MESSAGE_WITH_STATE_AND_INSTANCES = "The application reported a '{0}' state in {1:.3f}s. [Live: {2}, Desired: {3}]" +CRITICAL_MESSAGE_WITH_STATE = "The application reported a '{0}' state. Check took {1:.3f}s" +CRITICAL_MESSAGE = "Application information could not be retrieved" + +# results codes +CRITICAL_RESULT_CODE = 'CRITICAL' +OK_RESULT_CODE = 'OK' +UKNOWN_STATUS_CODE = 'UNKNOWN' + + +SECURITY_ENABLED_KEY = '{{cluster-env/security_enabled}}' + +HIVE_PRINCIPAL_KEY = '{{hive-interactive-site/hive.llap.daemon.service.principal}}' +HIVE_PRINCIPAL_DEFAULT = 'default.hive.principal' + +HIVE_PRINCIPAL_KEYTAB_KEY = '{{hive-interactive-site/hive.llap.daemon.keytab.file}}' +HIVE_PRINCIPAL_KEYTAB_DEFAULT = 'default.hive.keytab' + +HIVE_AUTHENTICATION_DEFAULT = 'NOSASL' + +HIVE_USER_KEY = '{{hive-env/hive_user}}' +HIVE_USER_DEFAULT = 'default.smoke.user' + +STACK_NAME = '{{cluster-env/stack_name}}' +STACK_ROOT = '{{cluster-env/stack_root}}' +STACK_ROOT_DEFAULT = Script.get_stack_root() + +LLAP_APP_NAME_KEY = '{{hive-interactive-env/llap_app_name}}' +LLAP_APP_NAME_DEFAULT = 'llap0' + +# The configured Kerberos executable search paths, if any +KERBEROS_EXECUTABLE_SEARCH_PATHS_KEY = '{{kerberos-env/executable_search_paths}}' + + +CHECK_COMMAND_TIMEOUT_KEY = 'check.command.timeout' +CHECK_COMMAND_TIMEOUT_DEFAULT = 120.0 + + +# Mapping of LLAP app states to 'user friendly' state names. +llap_app_state_dict = {'RUNNING_ALL': 'RUNNING', + 'RUNNING_PARTIAL': 'RUNNING', + 'COMPLETE': 'NOT RUNNING', + 'LAUNCHING': 'LAUNCHING', + 'APP_NOT_FOUND': 'APP NOT FOUND'} + +logger = logging.getLogger('ambari_alerts') + +@OsFamilyFuncImpl(os_family=OsFamilyImpl.DEFAULT) +def get_tokens(): + """ + Returns a tuple of tokens in the format {{site/property}} that will be used + to build the dictionary passed into execute + """ + return (SECURITY_ENABLED_KEY, KERBEROS_EXECUTABLE_SEARCH_PATHS_KEY, HIVE_PRINCIPAL_KEY, HIVE_PRINCIPAL_KEYTAB_KEY, + HIVE_USER_KEY, STACK_NAME, STACK_ROOT, LLAP_APP_NAME_KEY) + + +@OsFamilyFuncImpl(os_family=OsFamilyImpl.DEFAULT) +def execute(configurations={}, parameters={}, host_name=None): + """ + Returns a tuple containing the result code and a pre-formatted result label + + Keyword arguments: + configurations (dictionary): a mapping of configuration key to value + parameters (dictionary): a mapping of script parameter key to value + host_name (string): the name of this host where the alert is running + """ + + LLAP_APP_STATUS_CMD_TIMEOUT = 0 + + if configurations is None: + return ('UNKNOWN', ['There were no configurations supplied to the script.']) + + result_code = None + + try: + security_enabled = False + if SECURITY_ENABLED_KEY in configurations: + security_enabled = str(configurations[SECURITY_ENABLED_KEY]).upper() == 'TRUE' + + check_command_timeout = CHECK_COMMAND_TIMEOUT_DEFAULT + if CHECK_COMMAND_TIMEOUT_KEY in configurations: + check_command_timeout = int(parameters[CHECK_COMMAND_TIMEOUT_KEY]) + + hive_user = HIVE_USER_DEFAULT + if HIVE_USER_KEY in configurations: + hive_user = configurations[HIVE_USER_KEY] + + llap_app_name = LLAP_APP_NAME_DEFAULT + if LLAP_APP_NAME_KEY in configurations: + llap_app_name = configurations[LLAP_APP_NAME_KEY] + + if security_enabled: + if HIVE_PRINCIPAL_KEY in configurations: + llap_principal = configurations[HIVE_PRINCIPAL_KEY] + else: + llap_principal = HIVE_PRINCIPAL_DEFAULT + llap_principal = llap_principal.replace('_HOST',host_name.lower()) + + llap_keytab = HIVE_PRINCIPAL_KEYTAB_DEFAULT + if HIVE_PRINCIPAL_KEYTAB_KEY in configurations: + llap_keytab = configurations[HIVE_PRINCIPAL_KEYTAB_KEY] + + # Get the configured Kerberos executable search paths, if any + if KERBEROS_EXECUTABLE_SEARCH_PATHS_KEY in configurations: + kerberos_executable_search_paths = configurations[KERBEROS_EXECUTABLE_SEARCH_PATHS_KEY] + else: + kerberos_executable_search_paths = None + + kinit_path_local = get_kinit_path(kerberos_executable_search_paths) + kinitcmd=format("{kinit_path_local} -kt {llap_keytab} {llap_principal}; ") + + # prevent concurrent kinit + kinit_lock = global_lock.get_lock(global_lock.LOCK_TYPE_KERBEROS) + kinit_lock.acquire() + try: + Execute(kinitcmd, user=hive_user, + path=["/bin/", "/usr/bin/", "/usr/lib/hive/bin/", "/usr/sbin/"], + timeout=10) + finally: + kinit_lock.release() + + + + start_time = time.time() + if STACK_NAME in configurations and STACK_ROOT in configurations: + stack_root = stack_tools.get_stack_root(configurations[STACK_NAME], + configurations[STACK_ROOT]) + + llap_status_cmd = stack_root + format("/current/hive-server2-hive2/bin/hive --service llapstatus --name {llap_app_name} --findAppTimeout {LLAP_APP_STATUS_CMD_TIMEOUT}") + else: + llap_status_cmd = STACK_ROOT_DEFAULT + format("/current/hive-server2-hive2/bin/hive --service llapstatus --name {llap_app_name} --findAppTimeout {LLAP_APP_STATUS_CMD_TIMEOUT}") + + code, output, error = shell.checked_call(llap_status_cmd, user=hive_user, stderr=subprocess.PIPE, + timeout=check_command_timeout, + logoutput=False) + # Call for getting JSON + llap_app_info = make_valid_json(output) + + if llap_app_info is None or 'state' not in llap_app_info: + alert_label = traceback.format_exc() + result_code = UKNOWN_STATUS_CODE + return (result_code, [alert_label]) + + retrieved_llap_app_state = llap_app_info['state'].upper() + if retrieved_llap_app_state in ['RUNNING_ALL']: + result_code = OK_RESULT_CODE + total_time = time.time() - start_time + alert_label = OK_MESSAGE.format(llap_app_state_dict.get(retrieved_llap_app_state, retrieved_llap_app_state), total_time) + elif retrieved_llap_app_state in ['RUNNING_PARTIAL']: + live_instances = 0 + desired_instances = 0 + percentInstancesUp = 0 + percent_desired_instances_to_be_up = 80 + # Get 'live' and 'desired' instances + if 'liveInstances' not in llap_app_info or 'desiredInstances' not in llap_app_info: + result_code = CRITICAL_RESULT_CODE + total_time = time.time() - start_time + alert_label = CRITICAL_MESSAGE_WITH_STATE.format(llap_app_state_dict.get(retrieved_llap_app_state, retrieved_llap_app_state), total_time) + return (result_code, [alert_label]) + + live_instances = llap_app_info['liveInstances'] + desired_instances = llap_app_info['desiredInstances'] + if live_instances < 0 or desired_instances <= 0: + result_code = CRITICAL_RESULT_CODE + total_time = time.time() - start_time + alert_label = CRITICAL_MESSAGE_WITH_STATE.format(llap_app_state_dict.get(retrieved_llap_app_state, retrieved_llap_app_state), total_time) + return (result_code, [alert_label]) + + percentInstancesUp = float(live_instances) / desired_instances * 100 + if percentInstancesUp >= percent_desired_instances_to_be_up: + result_code = OK_RESULT_CODE + total_time = time.time() - start_time + alert_label = MESSAGE_WITH_STATE_AND_INSTANCES.format(llap_app_state_dict.get(retrieved_llap_app_state, retrieved_llap_app_state), + total_time, + llap_app_info['liveInstances'], + llap_app_info['desiredInstances']) + else: + result_code = CRITICAL_RESULT_CODE + total_time = time.time() - start_time + alert_label = MESSAGE_WITH_STATE_AND_INSTANCES.format(llap_app_state_dict.get(retrieved_llap_app_state, retrieved_llap_app_state), + total_time, + llap_app_info['liveInstances'], + llap_app_info['desiredInstances']) + else: + result_code = CRITICAL_RESULT_CODE + total_time = time.time() - start_time + alert_label = CRITICAL_MESSAGE_WITH_STATE.format(llap_app_state_dict.get(retrieved_llap_app_state, retrieved_llap_app_state), total_time) + except: + alert_label = traceback.format_exc() + traceback.format_exc() + result_code = UKNOWN_STATUS_CODE + return (result_code, [alert_label]) + + +""" +Remove extra lines from 'llapstatus' status output (eg: because of MOTD logging) so as to have a valid JSON data to be passed in +to JSON converter. +""" +def make_valid_json(output): + ''' + + Note: It is assumed right now that extra lines will be only at the start and not at the end. + + Sample expected JSON to be passed for 'loads' is either of the form : + + Case 'A': + { + "amInfo" : { + "appName" : "llap0", + "appType" : "org-apache-slider", + "appId" : "APP1", + "containerId" : "container_1466036628595_0010_01_000001", + "hostname" : "hostName", + "amWebUrl" : "http://hostName:port/" + }, + "state" : "LAUNCHING", + .... + "desiredInstances" : 1, + "liveInstances" : 0, + .... + .... + } + + or + + Case 'B': + { + "state" : "APP_NOT_FOUND" + } + + ''' + splits = output.split("\n") + + len_splits = len(splits) + if (len_splits < 3): + raise Fail("Malformed JSON data received from 'llapstatus' command. Exiting ....") + + marker_idx = None # To detect where from to start reading for JSON data + for idx, split in enumerate(splits): + curr_elem = split.strip() + if idx + 2 > len_splits: + raise Fail( + "Iterated over the received 'llapstatus' comamnd. Couldn't validate the received output for JSON parsing.") + next_elem = (splits[(idx + 1)]).strip() + if curr_elem == "{": + if next_elem == "\"amInfo\" : {" and (splits[len_splits - 1]).strip() == '}': + # For Case 'A' + marker_idx = idx + break; + elif idx + 3 == len_splits and next_elem.startswith('"state" : ') and (splits[idx + 2]).strip() == '}': + # For Case 'B' + marker_idx = idx + break; + + + # Remove extra logging from possible JSON output + if marker_idx is None: + raise Fail("Couldn't validate the received output for JSON parsing.") + else: + if marker_idx != 0: + del splits[0:marker_idx] + + scanned_output = '\n'.join(splits) + llap_app_info = json.loads(scanned_output) + return llap_app_info \ No newline at end of file http://git-wip-us.apache.org/repos/asf/ambari/blob/ab1d01bd/ambari-server/src/main/resources/stacks/BigInsights/4.2.5/services/HIVE/package/alerts/alert_webhcat_server.py ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/resources/stacks/BigInsights/4.2.5/services/HIVE/package/alerts/alert_webhcat_server.py b/ambari-server/src/main/resources/stacks/BigInsights/4.2.5/services/HIVE/package/alerts/alert_webhcat_server.py new file mode 100644 index 0000000..c9575c0 --- /dev/null +++ b/ambari-server/src/main/resources/stacks/BigInsights/4.2.5/services/HIVE/package/alerts/alert_webhcat_server.py @@ -0,0 +1,228 @@ +#!/usr/bin/env python + +""" +Licensed to the Apache Software Foundation (ASF) under one +or more contributor license agreements. See the NOTICE file +distributed with this work for additional information +regarding copyright ownership. The ASF licenses this file +to you under the Apache License, Version 2.0 (the +"License"); you may not use this file except in compliance +with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +""" + +import ambari_simplejson as json # simplejson is much faster comparing to Python 2.6 json module and has the same functions set. +import socket +import time +import urllib2 +import traceback +import logging + +from resource_management.core.environment import Environment +from resource_management.libraries.functions.curl_krb_request import curl_krb_request +from resource_management.libraries.functions.curl_krb_request import DEFAULT_KERBEROS_KINIT_TIMER_MS +from resource_management.libraries.functions.curl_krb_request import KERBEROS_KINIT_TIMER_PARAMETER + + +RESULT_CODE_OK = "OK" +RESULT_CODE_CRITICAL = "CRITICAL" +RESULT_CODE_UNKNOWN = "UNKNOWN" + +OK_MESSAGE = "WebHCat status was OK ({0:.3f}s response from {1})" +CRITICAL_CONNECTION_MESSAGE = "Connection failed to {0} + \n{1}" +CRITICAL_HTTP_MESSAGE = "HTTP {0} response from {1} \n{2}" +CRITICAL_WEBHCAT_STATUS_MESSAGE = 'WebHCat returned an unexpected status of "{0}"' +CRITICAL_WEBHCAT_UNKNOWN_JSON_MESSAGE = "Unable to determine WebHCat health from unexpected JSON response" + +TEMPLETON_PORT_KEY = '{{webhcat-site/templeton.port}}' +SECURITY_ENABLED_KEY = '{{cluster-env/security_enabled}}' +WEBHCAT_PRINCIPAL_KEY = '{{webhcat-site/templeton.kerberos.principal}}' +WEBHCAT_KEYTAB_KEY = '{{webhcat-site/templeton.kerberos.keytab}}' + +SMOKEUSER_KEYTAB_KEY = '{{cluster-env/smokeuser_keytab}}' +SMOKEUSER_PRINCIPAL_KEY = '{{cluster-env/smokeuser_principal_name}}' +SMOKEUSER_KEY = '{{cluster-env/smokeuser}}' + +# The configured Kerberos executable search paths, if any +KERBEROS_EXECUTABLE_SEARCH_PATHS_KEY = '{{kerberos-env/executable_search_paths}}' + +WEBHCAT_OK_RESPONSE = 'ok' +WEBHCAT_PORT_DEFAULT = 50111 + +CONNECTION_TIMEOUT_KEY = 'connection.timeout' +CONNECTION_TIMEOUT_DEFAULT = 5.0 +CURL_CONNECTION_TIMEOUT_DEFAULT = str(int(CONNECTION_TIMEOUT_DEFAULT)) + +# default keytab location +SMOKEUSER_KEYTAB_SCRIPT_PARAM_KEY = 'default.smoke.keytab' +SMOKEUSER_KEYTAB_DEFAULT = '/etc/security/keytabs/smokeuser.headless.keytab' + +# default smoke principal +SMOKEUSER_PRINCIPAL_SCRIPT_PARAM_KEY = 'default.smoke.principal' +SMOKEUSER_PRINCIPAL_DEFAULT = 'ambari...@example.com' + +# default smoke user +SMOKEUSER_DEFAULT = 'ambari-qa' +logger = logging.getLogger('ambari_alerts') + +def get_tokens(): + """ + Returns a tuple of tokens in the format {{site/property}} that will be used + to build the dictionary passed into execute + """ + return (TEMPLETON_PORT_KEY, SECURITY_ENABLED_KEY, SMOKEUSER_KEYTAB_KEY,SMOKEUSER_PRINCIPAL_KEY, + KERBEROS_EXECUTABLE_SEARCH_PATHS_KEY, SMOKEUSER_KEY) + + +def execute(configurations={}, parameters={}, host_name=None): + """ + Returns a tuple containing the result code and a pre-formatted result label + + Keyword arguments: + configurations (dictionary): a mapping of configuration key to value + parameters (dictionary): a mapping of script parameter key to value + host_name (string): the name of this host where the alert is running + """ + + result_code = RESULT_CODE_UNKNOWN + + if configurations is None: + return (result_code, ['There were no configurations supplied to the script.']) + + webhcat_port = WEBHCAT_PORT_DEFAULT + if TEMPLETON_PORT_KEY in configurations: + webhcat_port = int(configurations[TEMPLETON_PORT_KEY]) + + security_enabled = False + if SECURITY_ENABLED_KEY in configurations: + security_enabled = configurations[SECURITY_ENABLED_KEY].lower() == 'true' + + # parse script arguments + connection_timeout = CONNECTION_TIMEOUT_DEFAULT + curl_connection_timeout = CURL_CONNECTION_TIMEOUT_DEFAULT + if CONNECTION_TIMEOUT_KEY in parameters: + connection_timeout = float(parameters[CONNECTION_TIMEOUT_KEY]) + curl_connection_timeout = str(int(connection_timeout)) + + + # the alert will always run on the webhcat host + if host_name is None: + host_name = socket.getfqdn() + + smokeuser = SMOKEUSER_DEFAULT + + if SMOKEUSER_KEY in configurations: + smokeuser = configurations[SMOKEUSER_KEY] + + # webhcat always uses http, never SSL + query_url = "http://{0}:{1}/templeton/v1/status?user.name={2}".format(host_name, webhcat_port, smokeuser) + + # initialize + total_time = 0 + json_response = {} + + if security_enabled: + try: + # defaults + smokeuser_keytab = SMOKEUSER_KEYTAB_DEFAULT + smokeuser_principal = SMOKEUSER_PRINCIPAL_DEFAULT + + # check script params + if SMOKEUSER_PRINCIPAL_SCRIPT_PARAM_KEY in parameters: + smokeuser_principal = parameters[SMOKEUSER_PRINCIPAL_SCRIPT_PARAM_KEY] + if SMOKEUSER_KEYTAB_SCRIPT_PARAM_KEY in parameters: + smokeuser_keytab = parameters[SMOKEUSER_KEYTAB_SCRIPT_PARAM_KEY] + + # check configurations last as they should always take precedence + if SMOKEUSER_PRINCIPAL_KEY in configurations: + smokeuser_principal = configurations[SMOKEUSER_PRINCIPAL_KEY] + if SMOKEUSER_KEYTAB_KEY in configurations: + smokeuser_keytab = configurations[SMOKEUSER_KEYTAB_KEY] + + # Get the configured Kerberos executable search paths, if any + kerberos_executable_search_paths = None + if KERBEROS_EXECUTABLE_SEARCH_PATHS_KEY in configurations: + kerberos_executable_search_paths = configurations[KERBEROS_EXECUTABLE_SEARCH_PATHS_KEY] + + kinit_timer_ms = parameters.get(KERBEROS_KINIT_TIMER_PARAMETER, DEFAULT_KERBEROS_KINIT_TIMER_MS) + + env = Environment.get_instance() + stdout, stderr, time_millis = curl_krb_request(env.tmp_dir, smokeuser_keytab, smokeuser_principal, + query_url, "webhcat_alert_cc_", kerberos_executable_search_paths, True, + "WebHCat Server Status", smokeuser, connection_timeout=curl_connection_timeout, + kinit_timer_ms = kinit_timer_ms) + + # check the response code + response_code = int(stdout) + + # 0 indicates no connection + if response_code == 0: + label = CRITICAL_CONNECTION_MESSAGE.format(query_url, traceback.format_exc()) + return (RESULT_CODE_CRITICAL, [label]) + + # any other response aside from 200 is a problem + if response_code != 200: + label = CRITICAL_HTTP_MESSAGE.format(response_code, query_url, traceback.format_exc()) + return (RESULT_CODE_CRITICAL, [label]) + + # now that we have the http status and it was 200, get the content + stdout, stderr, total_time = curl_krb_request(env.tmp_dir, smokeuser_keytab, smokeuser_principal, + query_url, "webhcat_alert_cc_", kerberos_executable_search_paths, + False, "WebHCat Server Status", smokeuser, connection_timeout=curl_connection_timeout, + kinit_timer_ms = kinit_timer_ms) + + json_response = json.loads(stdout) + except: + return (RESULT_CODE_CRITICAL, [traceback.format_exc()]) + else: + url_response = None + + try: + # execute the query for the JSON that includes WebHCat status + start_time = time.time() + url_response = urllib2.urlopen(query_url, timeout=connection_timeout) + total_time = time.time() - start_time + + json_response = json.loads(url_response.read()) + except urllib2.HTTPError as httpError: + label = CRITICAL_HTTP_MESSAGE.format(httpError.code, query_url, traceback.format_exc()) + return (RESULT_CODE_CRITICAL, [label]) + except: + label = CRITICAL_CONNECTION_MESSAGE.format(query_url, traceback.format_exc()) + return (RESULT_CODE_CRITICAL, [label]) + finally: + if url_response is not None: + try: + url_response.close() + except: + pass + + + # if status is not in the response, we can't do any check; return CRIT + if 'status' not in json_response: + return (RESULT_CODE_CRITICAL, [CRITICAL_WEBHCAT_UNKNOWN_JSON_MESSAGE + str(json_response)]) + + + # URL response received, parse it + try: + webhcat_status = json_response['status'] + except: + return (RESULT_CODE_CRITICAL, [CRITICAL_WEBHCAT_UNKNOWN_JSON_MESSAGE + "\n" + traceback.format_exc()]) + + + # proper JSON received, compare against known value + if webhcat_status.lower() == WEBHCAT_OK_RESPONSE: + result_code = RESULT_CODE_OK + label = OK_MESSAGE.format(total_time, query_url) + else: + result_code = RESULT_CODE_CRITICAL + label = CRITICAL_WEBHCAT_STATUS_MESSAGE.format(webhcat_status) + + return (result_code, [label])