AMBARI-18852 HostCleanup.py to be able to stop processes owned by users or java processes (dili)
Project: http://git-wip-us.apache.org/repos/asf/ambari/repo Commit: http://git-wip-us.apache.org/repos/asf/ambari/commit/0e8474fe Tree: http://git-wip-us.apache.org/repos/asf/ambari/tree/0e8474fe Diff: http://git-wip-us.apache.org/repos/asf/ambari/diff/0e8474fe Branch: refs/heads/branch-dev-patch-upgrade Commit: 0e8474fea1de0ed113b50bb89a18e8534f991db5 Parents: b413d92 Author: Di Li <[email protected]> Authored: Tue Nov 15 16:47:00 2016 -0500 Committer: Di Li <[email protected]> Committed: Tue Nov 15 16:47:00 2016 -0500 ---------------------------------------------------------------------- .../src/main/python/ambari_agent/HostCleanup.py | 87 ++++++++++++++++++-- .../test/python/ambari_agent/TestHostCleanup.py | 18 ++-- 2 files changed, 93 insertions(+), 12 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/ambari/blob/0e8474fe/ambari-agent/src/main/python/ambari_agent/HostCleanup.py ---------------------------------------------------------------------- diff --git a/ambari-agent/src/main/python/ambari_agent/HostCleanup.py b/ambari-agent/src/main/python/ambari_agent/HostCleanup.py index cca79a8..87cd1b0 100644 --- a/ambari-agent/src/main/python/ambari_agent/HostCleanup.py +++ b/ambari-agent/src/main/python/ambari_agent/HostCleanup.py @@ -36,6 +36,8 @@ import shlex import datetime import tempfile import glob +import pwd +import re from AmbariConfig import AmbariConfig from ambari_agent.Constants import AGENT_TMP_DIR from ambari_commons import OSCheck, OSConst @@ -76,6 +78,8 @@ CACHE_FILES_PATTERN = { } PROCESS_SECTION = "processes" PROCESS_KEY = "proc_list" +PROCESS_OWNER_KEY = "proc_owner_list" +PROCESS_IDENTIFIER_KEY = "proc_identifier" ALT_SECTION = "alternatives" ALT_KEYS = ["symlink_list", "target_list"] HADOOP_GROUP = "hadoop" @@ -88,6 +92,7 @@ DIRNAME_PATTERNS = [ # resources that should not be cleaned REPOSITORY_BLACK_LIST = ["ambari.repo"] PACKAGES_BLACK_LIST = ["ambari-server", "ambari-agent"] +USER_BLACK_LIST = ["root"] def get_erase_cmd(): if OSCheck.is_redhat_family(): @@ -135,7 +140,10 @@ class HostCleanup: homeDirList = argMap.get(USER_HOMEDIR_SECTION) dirList = argMap.get(DIR_SECTION) repoList = argMap.get(REPO_SECTION) - procList = argMap.get(PROCESS_SECTION) + proc_map = argMap.get(PROCESS_SECTION) + procList = proc_map.get(PROCESS_KEY) + procUserList = proc_map.get(PROCESS_OWNER_KEY) + procIdentifierList = proc_map.get(PROCESS_IDENTIFIER_KEY) alt_map = argMap.get(ALT_SECTION) additionalDirList = self.get_additional_dirs() @@ -144,6 +152,12 @@ class HostCleanup: if procList and not PROCESS_SECTION in SKIP_LIST: logger.info("\n" + "Killing pid's: " + str(procList) + "\n") self.do_kill_processes(procList) + if procIdentifierList and not PROCESS_SECTION in SKIP_LIST: + self.do_kill_processes_by_identifier(procIdentifierList) + if procUserList and not PROCESS_SECTION in SKIP_LIST: + logger.info("\n" + "Killing pids owned by: " + str(procUserList) + "\n") + self.do_kill_processes_by_users(procUserList) + if packageList and not PACKAGE_SECTION in SKIP_LIST: logger.info("Deleting packages: " + str(packageList) + "\n") self.do_erase_packages(packageList) @@ -163,7 +177,7 @@ class HostCleanup: logger.info("\n" + "Deleting repo files: " + str(repoFiles)) self.do_erase_files_silent(repoFiles) if alt_map and not ALT_SECTION in SKIP_LIST: - logger.info("\n" + "Erasing alternatives:" + str(alt_map) + "\n") + logger.info("\n" + "Erasing alternatives: " + str(alt_map) + "\n") self.do_erase_alternatives(alt_map) return 0 @@ -190,11 +204,23 @@ class HostCleanup: propertyMap[PACKAGE_SECTION] = config.get(PACKAGE_SECTION, PACKAGE_KEY).split(',') except: logger.warn("Cannot read package list: " + str(sys.exc_info()[0])) + try: + proc_map = {} if config.has_option(PROCESS_SECTION, PROCESS_KEY): - propertyMap[PROCESS_SECTION] = config.get(PROCESS_SECTION, PROCESS_KEY).split(',') + proc_map[PROCESS_KEY] = config.get(PROCESS_SECTION, PROCESS_KEY).split(',') + + if config.has_option(PROCESS_SECTION, PROCESS_OWNER_KEY): + proc_map[PROCESS_OWNER_KEY] = config.get(PROCESS_SECTION, PROCESS_OWNER_KEY).split(',') + + if config.has_option(PROCESS_SECTION, PROCESS_IDENTIFIER_KEY): + proc_map[PROCESS_IDENTIFIER_KEY] = config.get(PROCESS_SECTION, PROCESS_IDENTIFIER_KEY).split(',') + + if proc_map: + propertyMap[PROCESS_SECTION] = proc_map except: - logger.warn("Cannot read process list: " + str(sys.exc_info()[0])) + logger.warn("Cannot read process list: " + str(sys.exc_info())) + try: if config.has_option(USER_SECTION, USER_KEY): propertyMap[USER_SECTION] = config.get(USER_SECTION, USER_KEY).split(',') @@ -317,9 +343,60 @@ class HostCleanup: command = PROC_KILL_CMD.format(pid) (returncode, stdoutdata, stderrdata) = self.run_os_command(command) if returncode != 0: - logger.error("Unable to kill process with pid: " + pid + ", " + stderrdata) + logger.error("Unable to kill process with pid: " + str(pid) + ", " + str(stderrdata)) return 0 + def getProcsByUsers(self, users, pidList): + logger.debug("User list: "+str(users)) + pids = [pid for pid in os.listdir('/proc') if pid.isdigit()] + logger.debug("All pids under /proc: "+str(pids)); + for pid in pids: + logger.debug("Checking " + str(pid)) + try: + with open(os.path.join('/proc', pid, 'status'), 'r') as f: + for line in f: + if line.startswith('Uid:'): + uid = int(line.split()[1]) + user = pwd.getpwuid(uid).pw_name + logger.debug("User: "+user); + if user in users and user not in USER_BLACK_LIST: + logger.info(user + " started process " + str(pid)) + pidList.append(int(pid)) + except: + logger.debug(str(sys.exc_info())) + + def do_kill_processes_by_users(self, userList): + pidList = [] + self.getProcsByUsers(userList, pidList) + logger.info("Killing pids: "+ str(pidList) + " owned by " + str(userList)) + return self.do_kill_processes(pidList) + + def do_kill_processes_by_identifier(self, identifierList): + pidList = [] + cmd = "ps aux" + (returncode, stdoutdata, stderrdata) = self.run_os_command(cmd, True) + + if 0 == returncode and stdoutdata: + lines = stdoutdata.split('\n') + for line in lines: + line = line.strip() + for identifier in identifierList: + identifier = identifier.strip() + if identifier in line: + logger.debug("Found " + line + " for " + identifier); + line = re.sub("\s\s+" , " ", line) #replace multi spaces with single space before calling the split + tokens = line.split(' ') + logger.debug(tokens) + logger.debug(len(tokens)) + if len(tokens) > 1: + pid = str(tokens[1]); + pid = pid.strip() + if pid and pid not in pidList: + logger.info("Adding pid: "+str(pid) + " for " + identifier) + pidList.append(pid) + + return self.do_kill_processes(pidList) + def get_files_in_dir(self, dirPath, filemask = None): fileList = [] if dirPath: http://git-wip-us.apache.org/repos/asf/ambari/blob/0e8474fe/ambari-agent/src/test/python/ambari_agent/TestHostCleanup.py ---------------------------------------------------------------------- diff --git a/ambari-agent/src/test/python/ambari_agent/TestHostCleanup.py b/ambari-agent/src/test/python/ambari_agent/TestHostCleanup.py index 165c5f0..9e1c87a 100644 --- a/ambari-agent/src/test/python/ambari_agent/TestHostCleanup.py +++ b/ambari-agent/src/test/python/ambari_agent/TestHostCleanup.py @@ -40,13 +40,16 @@ DIR_SECTION = "directories" DIR_KEY = "dir_list" PROCESS_SECTION = "processes" PROCESS_KEY = "proc_list" +PROCESS_OWNER_KEY = "proc_owner_list" ALT_SECTION = "alternatives" ALT_KEYS = ["symlink_list", "target_list"] ALT_ERASE_CMD = "alternatives --remove {0} {1}" USER_HOMEDIR_SECTION = "usr_homedir" -hostcheck_result_fileContent = """[processes] +hostcheck_result_fileContent = """ +[processes] proc_list = 323,434 +proc_owner_list = abc,efg [users] usr_list = rrdcached,ambari-qa,hive,oozie,hbase,hcat,mysql,mapred,hdfs,zookeeper,sqoop @@ -102,7 +105,8 @@ class TestHostCleanup(TestCase): patch_join_mock.return_value = f2.name propMap = self.hostcleanup.read_host_check_file(tmpfile) - self.assertTrue('434' in propMap["processes"]) + self.assertTrue('434' in propMap["processes"]["proc_list"]) + self.assertTrue('abc' in propMap["processes"]["proc_owner_list"]) self.assertTrue("mysql" in propMap["users"]) self.assertTrue("HDP-epel" in propMap["repositories"]) self.assertTrue("/etc/hadoop" in propMap["directories"]) @@ -220,7 +224,7 @@ class TestHostCleanup(TestCase): get_additional_dirs_method.return_value = ['/tmp/hadoop-yarn','/tmp/hsperfdata_007'] propertyMap = {PACKAGE_SECTION:['abcd', 'pqrst'], USER_SECTION:['abcd', 'pqrst'], REPO_SECTION:['abcd', 'pqrst'], DIR_SECTION:['abcd', 'pqrst'], - PROCESS_SECTION:['abcd', 'pqrst'], + PROCESS_SECTION:{PROCESS_KEY:['1234']}, ALT_SECTION:{ALT_KEYS[0]:['alt1','alt2'], ALT_KEYS[1]:[ 'dir1']}, USER_HOMEDIR_SECTION:['decf']} get_os_type_method.return_value = 'redhat' @@ -239,7 +243,7 @@ class TestHostCleanup(TestCase): do_erase_packages_method.assert_called_once_with(['abcd', 'pqrst']) do_erase_files_silent_method.assert_called_once_with(['abcd', 'pqrst']) do_delete_users_method.assert_called_once_with(['abcd', 'pqrst']) - do_kill_processes_method.assert_called_once_with(['abcd', 'pqrst']) + do_kill_processes_method.assert_called_once_with(['1234']) do_erase_alternatives_method.assert_called_once_with({ALT_KEYS[0]:['alt1', 'alt2'], ALT_KEYS[1]:['dir1']}) @@ -270,7 +274,7 @@ class TestHostCleanup(TestCase): sys.stdout = out propertyMap = {PACKAGE_SECTION:['abcd', 'pqrst'], USER_SECTION:['abcd', 'pqrst'], REPO_SECTION:['abcd', 'pqrst'], DIR_SECTION:['abcd', 'pqrst'], - PROCESS_SECTION:['abcd', 'pqrst'], + PROCESS_SECTION:{PROCESS_KEY:['abcd', 'pqrst']}, ALT_SECTION:{ALT_KEYS[0]:['alt1','alt2'], ALT_KEYS[1]:[ 'dir1']}} get_os_type_method.return_value = 'redhat' @@ -311,7 +315,7 @@ class TestHostCleanup(TestCase): sys.stdout = out propertyMap = {PACKAGE_SECTION:['abcd', 'pqrst'], USER_SECTION:['abcd', 'pqrst'], REPO_SECTION:['abcd', 'pqrst'], DIR_SECTION:['abcd', 'pqrst'], - PROCESS_SECTION:['abcd', 'pqrst']} + PROCESS_SECTION:{PROCESS_KEY:['abcd', 'pqrst']}} get_os_type_method.return_value = 'redhat' find_repo_files_for_repos_method.return_value = ['abcd', 'pqrst'] HostCleanup.SKIP_LIST = [PACKAGE_SECTION, REPO_SECTION] @@ -369,7 +373,7 @@ class TestHostCleanup(TestCase): self.assertTrue(propertyMap.has_key(USER_SECTION)) self.assertTrue(propertyMap.has_key(DIR_SECTION)) self.assertTrue(propertyMap.has_key(PROCESS_SECTION)) - self.assertEquals(propertyMap[PROCESS_SECTION][0], "323") + self.assertEquals(propertyMap[PROCESS_SECTION][PROCESS_KEY][0], "323") sys.stdout = sys.__stdout__
