AMBARI-18151. Oozie Hive actions fail when Atlas is installed since Atlas Hive Hooks need to be copied to Oozie Share Lib in HDFS (alejandro)
Project: http://git-wip-us.apache.org/repos/asf/ambari/repo Commit: http://git-wip-us.apache.org/repos/asf/ambari/commit/aee40060 Tree: http://git-wip-us.apache.org/repos/asf/ambari/tree/aee40060 Diff: http://git-wip-us.apache.org/repos/asf/ambari/diff/aee40060 Branch: refs/heads/branch-dev-logsearch Commit: aee400608f6d111b4c3f8913bc1a01d640a6f409 Parents: a929325 Author: Alejandro Fernandez <afernan...@hortonworks.com> Authored: Wed Aug 17 13:23:48 2016 -0700 Committer: Alejandro Fernandez <afernan...@hortonworks.com> Committed: Wed Aug 17 13:29:16 2016 -0700 ---------------------------------------------------------------------- .../libraries/functions/constants.py | 1 + .../libraries/providers/hdfs_resource.py | 4 +- .../OOZIE/4.0.0.2.0/package/scripts/oozie.py | 142 ++++++++++++++++++- .../4.0.0.2.0/package/scripts/oozie_service.py | 7 + .../4.0.0.2.0/package/scripts/params_linux.py | 20 +++ .../HDP/2.0.6/properties/stack_features.json | 5 + .../services/HIVE/configuration/hive-site.xml | 26 ++++ 7 files changed, 201 insertions(+), 4 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/ambari/blob/aee40060/ambari-common/src/main/python/resource_management/libraries/functions/constants.py ---------------------------------------------------------------------- diff --git a/ambari-common/src/main/python/resource_management/libraries/functions/constants.py b/ambari-common/src/main/python/resource_management/libraries/functions/constants.py index c5cbb3f..9306a43 100644 --- a/ambari-common/src/main/python/resource_management/libraries/functions/constants.py +++ b/ambari-common/src/main/python/resource_management/libraries/functions/constants.py @@ -91,6 +91,7 @@ class StackFeature: ATLAS_RANGER_PLUGIN_SUPPORT = "atlas_ranger_plugin_support" ATLAS_UPGRADE_SUPPORT = "atlas_upgrade_support" ATLAS_CONF_DIR_IN_PATH = "atlas_conf_dir_in_path" + ATLAS_HOOK_SUPPORT = "atlas_hook_support" FALCON_ATLAS_SUPPORT_2_3 = "falcon_atlas_support_2_3" FALCON_ATLAS_SUPPORT = "falcon_atlas_support" RANGER_PID_SUPPORT = "ranger_pid_support" http://git-wip-us.apache.org/repos/asf/ambari/blob/aee40060/ambari-common/src/main/python/resource_management/libraries/providers/hdfs_resource.py ---------------------------------------------------------------------- diff --git a/ambari-common/src/main/python/resource_management/libraries/providers/hdfs_resource.py b/ambari-common/src/main/python/resource_management/libraries/providers/hdfs_resource.py index 7abdf5c..ee41195 100644 --- a/ambari-common/src/main/python/resource_management/libraries/providers/hdfs_resource.py +++ b/ambari-common/src/main/python/resource_management/libraries/providers/hdfs_resource.py @@ -333,8 +333,8 @@ class HdfsResourceWebHDFS: def _set_owner(self, file_status=None): owner = "" if not self.main_resource.resource.owner else self.main_resource.resource.owner group = "" if not self.main_resource.resource.group else self.main_resource.resource.group - - if (not owner or file_status and file_status['owner'] == owner) and (not group or file_status and file_status['group'] == group): + + if not self.main_resource.resource.recursive_chown and (not owner or file_status and file_status['owner'] == owner) and (not group or file_status and file_status['group'] == group): return self.util.run_command(self.main_resource.resource.target, 'SETOWNER', method='PUT', owner=owner, group=group, assertable_result=False) http://git-wip-us.apache.org/repos/asf/ambari/blob/aee40060/ambari-server/src/main/resources/common-services/OOZIE/4.0.0.2.0/package/scripts/oozie.py ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/resources/common-services/OOZIE/4.0.0.2.0/package/scripts/oozie.py b/ambari-server/src/main/resources/common-services/OOZIE/4.0.0.2.0/package/scripts/oozie.py index dd3a692..c408f8d 100644 --- a/ambari-server/src/main/resources/common-services/OOZIE/4.0.0.2.0/package/scripts/oozie.py +++ b/ambari-server/src/main/resources/common-services/OOZIE/4.0.0.2.0/package/scripts/oozie.py @@ -17,27 +17,38 @@ See the License for the specific language governing permissions and limitations under the License. """ +# Python Imports import os +import re +# Resource Management Imports from resource_management.core.resources.service import ServiceConfig from resource_management.core.resources.system import Directory, Execute, File from resource_management.core.source import DownloadSource from resource_management.core.source import InlineTemplate from resource_management.core.source import Template from resource_management.libraries.functions.format import format +from resource_management.libraries.functions.default import default from resource_management.libraries.functions import StackFeature +from resource_management.libraries.functions.version import format_stack_version from resource_management.libraries.functions.stack_features import check_stack_feature from resource_management.libraries.functions.oozie_prepare_war import prepare_war from resource_management.libraries.resources.xml_config import XmlConfig from resource_management.libraries.script.script import Script from resource_management.core.resources.packaging import Package -from resource_management.core.shell import as_user -from resource_management.core.shell import as_sudo +from resource_management.core.shell import as_user, as_sudo, call +from resource_management.core.exceptions import Fail + +from resource_management.libraries.functions.setup_atlas_hook import has_atlas_in_cluster, setup_atlas_hook +from ambari_commons.constants import SERVICE, UPGRADE_TYPE_NON_ROLLING, UPGRADE_TYPE_ROLLING +from resource_management.libraries.functions.constants import Direction from ambari_commons.os_family_impl import OsFamilyFuncImpl, OsFamilyImpl from ambari_commons import OSConst from ambari_commons.inet_utils import download_file +from resource_management.core import Logger + @OsFamilyFuncImpl(os_family=OSConst.WINSRV_FAMILY) def oozie(is_server=False): @@ -294,12 +305,139 @@ def oozie_server_specific(): group = params.user_group, mode = 0664 ) + + # If Atlas is also installed, need to generate Atlas Hive hook (hive-atlas-application.properties file) in directory + # {stack_root}/{version}/atlas/hook/hive/ + # Because this is a .properties file instead of an xml file, it will not be read automatically by Oozie. + # However, should still save the file on this host so that can upload it to the Oozie Sharelib in DFS. + if has_atlas_in_cluster(): + atlas_hook_filepath = os.path.join(params.hive_conf_dir, params.atlas_hook_filename) + Logger.info("Has atlas in cluster, will save Atlas Hive hook into location %s" % str(atlas_hook_filepath)) + setup_atlas_hook(SERVICE.HIVE, params.hive_atlas_application_properties, atlas_hook_filepath, params.oozie_user, params.user_group) + Directory(params.oozie_server_dir, owner = params.oozie_user, group = params.user_group, recursive_ownership = True, ) +def __parse_sharelib_from_output(output): + """ + Return the parent directory of the first path from the output of the "oozie admin -shareliblist command $comp" + Output will match pattern like: + + Potential errors + [Available ShareLib] + hive + hdfs://server:8020/user/oozie/share/lib/lib_20160811235630/hive/file1.jar + hdfs://server:8020/user/oozie/share/lib/lib_20160811235630/hive/file2.jar + """ + if output is not None: + pattern = re.compile(r"\[Available ShareLib\]\n\S*?\n(.*share.*)", re.IGNORECASE) + m = pattern.search(output) + if m and len(m.groups()) == 1: + jar_path = m.group(1) + # Remove leading/trailing spaces and get the containing directory + sharelib_dir = os.path.dirname(jar_path.strip()) + return sharelib_dir + return None + +def copy_atlas_hive_hook_to_dfs_share_lib(upgrade_type=None, upgrade_direction=None): + """ + If the Atlas Hive Hook direcotry is present, Atlas is installed, and this is the first Oozie Server, + then copy the entire contents of that directory to the Oozie Sharelib in DFS, e.g., + /usr/$stack/$version/atlas/hook/hive/ -> hdfs:///user/oozie/share/lib/lib_$timetamp/hive + + :param upgrade_type: If in the middle of a stack upgrade, the type as UPGRADE_TYPE_ROLLING or UPGRADE_TYPE_NON_ROLLING + :param upgrade_direction: If in the middle of a stack upgrade, the direction as Direction.UPGRADE or Direction.DOWNGRADE. + """ + import params + + # Calculate the effective version since this code can also be called during EU/RU in the upgrade direction. + effective_version = params.stack_version_formatted if upgrade_type is None else format_stack_version(params.version) + if not check_stack_feature(StackFeature.ATLAS_HOOK_SUPPORT, effective_version): + return + + # Important that oozie_server_hostnames is sorted by name so that this only runs on a single Oozie server. + if not (len(params.oozie_server_hostnames) > 0 and params.hostname == params.oozie_server_hostnames[0]): + Logger.debug("Will not attempt to copy Atlas Hive hook to DFS since this is not the first Oozie Server " + "sorted by hostname.") + return + + if not has_atlas_in_cluster(): + Logger.debug("Will not attempt to copy Atlas Hve hook to DFS since Atlas is not installed on the cluster.") + return + + if upgrade_type is not None and upgrade_direction == Direction.DOWNGRADE: + Logger.debug("Will not attempt to copy Atlas Hve hook to DFS since in the middle of Rolling/Express upgrade " + "and performing a Downgrade.") + return + + atlas_hive_hook_dir = format("{stack_root}/{version}/atlas/hook/hive/") + if not os.path.exists(atlas_hive_hook_dir): + Logger.error(format("ERROR. Atlas is installed in cluster but this Oozie server doesn't " + "contain directory {atlas_hive_hook_dir}")) + return + + num_files = len([name for name in os.listdir(atlas_hive_hook_dir) if os.path.exists(os.path.join(atlas_hive_hook_dir, name))]) + Logger.info("Found %d files/directories inside Atlas Hive hook directory %s"% (num_files, atlas_hive_hook_dir)) + + # This can return over 100 files, so take the first 5 lines after "Available ShareLib" + command = format(r'source {conf_dir}/oozie-env.sh ; oozie admin -shareliblist hive | grep "\[Available ShareLib\]" -A 5') + + try: + code, out = call(command, user=params.oozie_user, tries=10, try_sleep=5, logoutput=True) + if code == 0 and out is not None: + hive_sharelib_dir = __parse_sharelib_from_output(out) + + if hive_sharelib_dir is None: + raise Fail("Could not parse Hive sharelib from output.") + + Logger.info("Parsed Hive sharelib = %s and will attempt to copy/replace %d files to it from %s" % + (hive_sharelib_dir, num_files, atlas_hive_hook_dir)) + + params.HdfsResource(hive_sharelib_dir, + type="directory", + action="create_on_execute", + source=atlas_hive_hook_dir, + user=params.hdfs_user, + owner=params.oozie_user, + group=params.hdfs_user, + mode=0755, + recursive_chown=True, + recursive_chmod=True, + replace_existing_files=True + ) + + Logger.info("Copying Atlas Hive hook properties file to Oozie Sharelib in DFS.") + atlas_hook_filepath_source = os.path.join(params.hive_conf_dir, params.atlas_hook_filename) + atlas_hook_file_path_dest_in_dfs = os.path.join(hive_sharelib_dir, params.atlas_hook_filename) + params.HdfsResource(atlas_hook_file_path_dest_in_dfs, + type="file", + source=atlas_hook_filepath_source, + action="create_on_execute", + owner=params.oozie_user, + group=params.hdfs_user, + mode=0755, + replace_existing_files=True + ) + params.HdfsResource(None, action="execute") + + # Update the sharelib after making any changes + # Since calling oozie-env.sh, don't have to specify -oozie http(s):localhost:{oozie_server_admin_port}/oozie + command = format("source {conf_dir}/oozie-env.sh ; oozie admin -sharelibupdate") + code, out = call(command, user=params.oozie_user, tries=5, try_sleep=5, logoutput=True) + if code == 0 and out is not None: + Logger.info("Successfully updated the Oozie ShareLib") + else: + raise Exception("Could not update the Oozie ShareLib after uploading the Atlas Hive hook directory to DFS. " + "Code: %s" % str(code)) + else: + raise Exception("Code is non-zero or output is empty. Code: %s" % str(code)) + except Fail, e: + Logger.error("Failed to get Hive sharelib directory in DFS. %s" % str(e)) + + def download_database_library_if_needed(target_directory = None): """ Downloads the library to use when connecting to the Oozie database, if http://git-wip-us.apache.org/repos/asf/ambari/blob/aee40060/ambari-server/src/main/resources/common-services/OOZIE/4.0.0.2.0/package/scripts/oozie_service.py ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/resources/common-services/OOZIE/4.0.0.2.0/package/scripts/oozie_service.py b/ambari-server/src/main/resources/common-services/OOZIE/4.0.0.2.0/package/scripts/oozie_service.py index 2316378..74b639b 100644 --- a/ambari-server/src/main/resources/common-services/OOZIE/4.0.0.2.0/package/scripts/oozie_service.py +++ b/ambari-server/src/main/resources/common-services/OOZIE/4.0.0.2.0/package/scripts/oozie_service.py @@ -17,8 +17,13 @@ See the License for the specific language governing permissions and limitations under the License. """ +# Python Imports import os +# Local Imports +from oozie import copy_atlas_hive_hook_to_dfs_share_lib + +# Resource Managemente Imports from resource_management.core import sudo from resource_management.core.shell import as_user from resource_management.core.logger import Logger @@ -146,6 +151,8 @@ def oozie_service(action = 'start', upgrade_type=None): # start oozie Execute( start_cmd, environment=environment, user = params.oozie_user, not_if = no_op_test ) + + copy_atlas_hive_hook_to_dfs_share_lib(upgrade_type, params.upgrade_direction) except: show_logs(params.oozie_log_dir, params.oozie_user) raise http://git-wip-us.apache.org/repos/asf/ambari/blob/aee40060/ambari-server/src/main/resources/common-services/OOZIE/4.0.0.2.0/package/scripts/params_linux.py ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/resources/common-services/OOZIE/4.0.0.2.0/package/scripts/params_linux.py b/ambari-server/src/main/resources/common-services/OOZIE/4.0.0.2.0/package/scripts/params_linux.py index 2fc3e8b..80f2cd7 100644 --- a/ambari-server/src/main/resources/common-services/OOZIE/4.0.0.2.0/package/scripts/params_linux.py +++ b/ambari-server/src/main/resources/common-services/OOZIE/4.0.0.2.0/package/scripts/params_linux.py @@ -26,6 +26,7 @@ from resource_management.libraries.functions.default import default from resource_management.libraries.functions import get_kinit_path from resource_management.libraries.functions import get_port_from_url from resource_management.libraries.functions.get_not_managed_resources import get_not_managed_resources +from resource_management.libraries.functions.setup_atlas_hook import has_atlas_in_cluster from resource_management.libraries.script.script import Script from resource_management.libraries.functions.get_lzo_packages import get_lzo_packages from resource_management.libraries.functions.expect import expect @@ -42,6 +43,10 @@ config = Script.get_config() tmp_dir = Script.get_tmp_dir() sudo = AMBARI_SUDO_BINARY + +# Needed since this writes out the Atlas Hive Hook config file. +cluster_name = config['clusterName'] + hostname = config["hostname"] # New Cluster Stack Version that is defined during the RESTART of a Rolling Upgrade @@ -276,6 +281,10 @@ ambari_server_hostname = config['clusterHostInfo']['ambari_server_host'][0] falcon_host = default("/clusterHostInfo/falcon_server_hosts", []) has_falcon_host = not len(falcon_host) == 0 +oozie_server_hostnames = default("/clusterHostInfo/oozie_server", []) +oozie_server_hostnames = sorted(oozie_server_hostnames) + + #oozie-log4j.properties if (('oozie-log4j' in config['configurations']) and ('content' in config['configurations']['oozie-log4j'])): log4j_props = config['configurations']['oozie-log4j']['content'] @@ -292,6 +301,17 @@ default_fs = config['configurations']['core-site']['fs.defaultFS'] dfs_type = default("/commandParams/dfs_type", "") + +######################################################## +############# Atlas related params ##################### +######################################################## +#region Atlas Hooks needed by Hive on Oozie +hive_atlas_application_properties = default('/configurations/hive-atlas-application.properties', {}) + +if has_atlas_in_cluster(): + atlas_hook_filename = default('/configurations/atlas-env/metadata_conf_file', 'atlas-application.properties') +#endregion + import functools #create partial functions with common arguments for every HdfsResource call #to create/delete hdfs directory/file/copyfromlocal we need to call params.HdfsResource in code http://git-wip-us.apache.org/repos/asf/ambari/blob/aee40060/ambari-server/src/main/resources/stacks/HDP/2.0.6/properties/stack_features.json ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/resources/stacks/HDP/2.0.6/properties/stack_features.json b/ambari-server/src/main/resources/stacks/HDP/2.0.6/properties/stack_features.json index ed995b2..a754cd5 100644 --- a/ambari-server/src/main/resources/stacks/HDP/2.0.6/properties/stack_features.json +++ b/ambari-server/src/main/resources/stacks/HDP/2.0.6/properties/stack_features.json @@ -269,6 +269,11 @@ "min_version": "2.5.0.0" }, { + "name": "atlas_hook_support", + "description": "Atlas support for hooks in Hive, Storm, Falcon, and Sqoop", + "min_version": "2.5.0.0" + }, + { "name": "ranger_pid_support", "description": "Ranger Service support pid generation AMBARI-16756", "min_version": "2.5.0.0" http://git-wip-us.apache.org/repos/asf/ambari/blob/aee40060/ambari-server/src/main/resources/stacks/HDP/2.5/services/HIVE/configuration/hive-site.xml ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/resources/stacks/HDP/2.5/services/HIVE/configuration/hive-site.xml b/ambari-server/src/main/resources/stacks/HDP/2.5/services/HIVE/configuration/hive-site.xml new file mode 100644 index 0000000..bf1de39 --- /dev/null +++ b/ambari-server/src/main/resources/stacks/HDP/2.5/services/HIVE/configuration/hive-site.xml @@ -0,0 +1,26 @@ +<?xml version="1.0"?> +<?xml-stylesheet type="text/xsl" href="configuration.xsl"?> +<!-- +Licensed to the Apache Software Foundation (ASF) under one or more +contributor license agreements. See the NOTICE file distributed with +this work for additional information regarding copyright ownership. +The ASF licenses this file to You under the Apache License, Version 2.0 +(the "License"); you may not use this file except in compliance with +the License. You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +--> +<configuration supports_final="true"> + <!-- This property is removed in HDP 2.5 and higher. --> + <property> + <name>atlas.rest.address</name> + <value>http://localhost:21000</value> + <deleted>true</deleted> + </property> +</configuration>