Repository: ambari Updated Branches: refs/heads/trunk 1a8a53828 -> 85c9104ec
AMBARI-19003. Perf: Fix deploy-gce-perf-cluster.py to deploy separate server onto own cluster with different settings for more cores and MySQL DB (alejandro) Project: http://git-wip-us.apache.org/repos/asf/ambari/repo Commit: http://git-wip-us.apache.org/repos/asf/ambari/commit/85c9104e Tree: http://git-wip-us.apache.org/repos/asf/ambari/tree/85c9104e Diff: http://git-wip-us.apache.org/repos/asf/ambari/diff/85c9104e Branch: refs/heads/trunk Commit: 85c9104ec14dc7b059d41f61dbbc6e57aec216d0 Parents: 1a8a538 Author: Alejandro Fernandez <afernan...@hortonworks.com> Authored: Tue Nov 22 14:37:07 2016 -0800 Committer: Alejandro Fernandez <afernan...@hortonworks.com> Committed: Tue Nov 29 12:42:58 2016 -0800 ---------------------------------------------------------------------- contrib/utils/perf/deploy-gce-perf-cluster.py | 243 ++++++++++++++------- 1 file changed, 169 insertions(+), 74 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/ambari/blob/85c9104e/contrib/utils/perf/deploy-gce-perf-cluster.py ---------------------------------------------------------------------- diff --git a/contrib/utils/perf/deploy-gce-perf-cluster.py b/contrib/utils/perf/deploy-gce-perf-cluster.py index 95ed98f..4737c6f 100644 --- a/contrib/utils/perf/deploy-gce-perf-cluster.py +++ b/contrib/utils/perf/deploy-gce-perf-cluster.py @@ -148,6 +148,7 @@ class SCP: return {"exitstatus": scpstat.returncode, "log": log, "errormsg": errorMsg} + # main method to parse arguments from user and start work def main(): parser = argparse.ArgumentParser( @@ -178,6 +179,30 @@ def main(): args = parser.parse_args() do_work(args) + +def do_work(args): + """ + Check that all required args are passed in. If so, deploy the cluster. + :param args: Command line args + """ + if not args.controller: + raise Exception("GCE controller ip address is not defined!") + + if not args.key: + raise Exception("Path to gce ssh key is not defined!") + + if not args.cluster_suffix: + raise Exception("Cluster name suffix is not defined!") + + if not args.agent_prefix: + raise Exception("Agent name prefix is not defined!") + + if not args.agents_count: + raise Exception("Agents count for whole cluster is not defined (will put 50 Agents per VM)!") + + deploy_cluster(args) + + def deploy_cluster(args): """ Process cluster deployment @@ -186,49 +211,54 @@ def deploy_cluster(args): # When dividing, need to get the ceil. number_of_nodes = ((args.agents_count - 1) / NUMBER_OF_AGENTS_ON_HOST) + 1 - # trying to create cluster with needed params - print "Creating cluster {0}-{1} with {2} large nodes on centos6...".format(cluster_prefix, args.cluster_suffix, str(number_of_nodes)) - execute_command(args, args.controller, "/usr/sbin/gce up {0}-{1} {2} --centos6 --large".format(cluster_prefix, args.cluster_suffix, str(number_of_nodes)), - "Failed to create cluster, probably not enough resources!", "-tt") - - # VMs are not accessible immediately - time.sleep(10) + # In case of an error after creating VMs, can simply comment out this function to run again without creating VMs. + create_vms(args, number_of_nodes) # getting list of vms information like hostname and ip address print "Getting list of virtual machines from cluster..." # Dictionary from host name to IP - vms = get_vms_list(args) + (server_dict, agents_dict) = get_vms_list(args) # check number of nodes in cluster to be the same as user asked print "Checking count of created nodes in cluster..." - if not vms or len(vms) < number_of_nodes: - raise Exception("Cannot bring up enough nodes. Requested {0}, but got {1}. Probably not enough resources!".format(number_of_nodes, len(vms))) + if not agents_dict or len(agents_dict) < number_of_nodes: + raise Exception("Cannot bring up enough nodes. Requested {0}, but got {1}. Probably not enough resources!".format(number_of_nodes, len(agents_dict))) - print "GCE cluster was successfully created!" - pretty_print_vms(vms) + print "GCE cluster was successfully created!\n" # installing/starting ambari-server and ambari-agents on each host - server_host_name = sorted(vms.items())[0][0] - server_installed = False + server_item = server_dict.items()[0] + server_host_name = server_item[0] + server_ip = server_item[1] + print "==========================" + print "Server Hostname: %s" % server_host_name + print "Server IP: %s" % server_ip + print "==========================\n" + + # Sort the agents by hostname into a list. + sorted_agents = sort_hosts(agents_dict) + pretty_print_vms(sorted_agents) - print "Creating server.sh script (which will be executed on server to install/configure/start ambari-server and ambari-agent)..." - create_server_script(args, server_host_name) + print "Creating server.sh script (which will be executed on server to install/configure/start ambari-server)..." + create_server_script(server_host_name) print "Creating agent.sh script (which will be executed on agent hosts to install/configure/start ambari-agent..." - create_agent_script(args, server_host_name) + create_agent_script(server_host_name) time.sleep(10) + prepare_server(args, server_host_name, server_ip) + # If the user asks for a number of agents that is not a multiple of 50, then only create how many are needed instead # of 50 on every VM. num_agents_left_to_create = args.agents_count - start_num = 1 - for (hostname, ip) in sorted(vms.items()): + + for (hostname, ip) in sorted_agents: num_agents_on_this_host = min(num_agents_left_to_create, NUMBER_OF_AGENTS_ON_HOST) print "==========================" - print "Working on VM {0} that will contain hosts %d - %d".format(hostname, start_num, start_num + num_agents_on_this_host - 1) + print "Working on VM {0} that will contain hosts {1} - {2}".format(hostname, start_num, start_num + num_agents_on_this_host - 1) # The agent multiplier config will be different on each VM. @@ -236,90 +266,117 @@ def deploy_cluster(args): start_num += num_agents_on_this_host num_agents_left_to_create -= num_agents_on_this_host - if not server_installed: - remote_path = "/server.sh" - local_path = "server.sh" - print "Copying server.sh to {0}...".format(hostname) - put_file(args, ip, local_path, remote_path, "Failed to copy file!") - - print "Generating agent-multiplier.conf" - execute_command(args, ip, cmd_generate_multiplier_conf, "Failed to generate agent-multiplier.conf on host {0}".format(hostname)) - - print "Executing remote ssh command (set correct permissions and start executing server.sh in separate process) on {0}...".format(hostname) - execute_command(args, ip, "cd /; chmod 777 server.sh; nohup ./server.sh >/server.log 2>&1 &", - "Install/configure/start server script failed!") - server_installed = True - else: - remote_path = "/agent.sh" - local_path = "agent.sh" - print "Copying agent.sh to {0}...".format(hostname) - put_file(args, ip, local_path, remote_path, "Failed to copy file!") - - print "Generating agent-multiplier.conf" - execute_command(args, ip, cmd_generate_multiplier_conf, "Failed to generate agent-multiplier.conf on host {0}".format(hostname)) - - print "Executing remote ssh command (set correct permissions and start executing agent.sh in separate process) on {0}...".format(hostname) - execute_command(args, ip, "cd /; chmod 777 agent.sh; nohup ./agent.sh >/agent.log 2>&1 &", - "Install/configure start agent script failed!") + prepare_agent(args, hostname, ip, cmd_generate_multiplier_conf) + pass print "All scripts where successfully copied and started on all hosts. " \ "\nPay attention that server.sh script need 5 minutes to finish and agent.sh need 3 minutes!" -def do_work(args): +def create_vms(args, number_of_nodes): """ - Check that all required args are passed in. If so, deploy the cluster. + Request the server and VMs for the agents from GCE. :param args: Command line args + :param number_of_nodes: Number of VMs to request. """ - if not args.controller: - raise Exception("GCE controller ip address is not defined!") + print "Creating server VM {0}-server-{1} with xxlarge nodes on centos6...".format(cluster_prefix, args.cluster_suffix) + execute_command(args, args.controller, "/usr/sbin/gce up {0}-server-{1} 1 --centos6 --xxlarge".format(cluster_prefix, args.cluster_suffix), + "Failed to create server, probably not enough resources!", "-tt") + time.sleep(10) - if not args.key: - raise Exception("Path to gce ssh key is not defined!") + # trying to create cluster with needed params + print "Creating agent VMs {0}-agent-{1} with {2} large nodes on centos6...".format(cluster_prefix, args.cluster_suffix, str(number_of_nodes)) + execute_command(args, args.controller, "/usr/sbin/gce up {0}-agent-{1} {2} --centos6 --large".format(cluster_prefix, args.cluster_suffix, str(number_of_nodes)), + "Failed to create cluster VMs, probably not enough resources!", "-tt") - if not args.cluster_suffix: - raise Exception("Cluster name suffix is not defined!") + # VMs are not accessible immediately + time.sleep(10) - if not args.agent_prefix: - raise Exception("Agent name prefix is not defined!") - if not args.agents_count: - raise Exception("Agents count for whole cluster is not defined (will put 50 Agents per VM)!") +def prepare_server(args, hostname, ip): + remote_path = "/server.sh" + local_path = "server.sh" + print "Copying server.sh to {0}...".format(hostname) + put_file(args, ip, local_path, remote_path, "Failed to copy file!") - deploy_cluster(args) + print "Executing remote ssh command (set correct permissions and start executing server.sh in separate process) on {0}...".format(hostname) + execute_command(args, ip, "cd /; chmod 777 server.sh; nohup ./server.sh >/server.log 2>&1 &", + "Install/configure/start server script failed!") -def create_server_script(args, server_host_name): +def prepare_agent(args, hostname, ip, cmd_generate_multiplier_conf): + remote_path = "/agent.sh" + local_path = "agent.sh" + print "Copying agent.sh to {0}...".format(hostname) + put_file(args, ip, local_path, remote_path, "Failed to copy file!") + + print "Generating agent-multiplier.conf" + execute_command(args, ip, cmd_generate_multiplier_conf, "Failed to generate agent-multiplier.conf on host {0}".format(hostname)) + + print "Executing remote ssh command (set correct permissions and start executing agent.sh in separate process) on {0}...".format(hostname) + execute_command(args, ip, "cd /; chmod 777 agent.sh; nohup ./agent.sh >/agent.log 2>&1 &", + "Install/configure start agent script failed!") + + +def create_server_script(server_host_name): """ Creating server.sh script in the same dir where current script is located server.sh script will install, configure and start ambari-server and ambari-agent on host - :param args: Command line args :param server_host_name: Server host name """ + # ambari-server setup <options> may not work property, so doing several calls like + # echo "arg=value" >> .../ambari.properties + contents = "#!/bin/bash\n" + \ "wget -O /etc/yum.repos.d/ambari.repo {0}\n".format(ambari_repo_file_url) + \ - "yum clean all; yum install git ambari-server ambari-agent -y\n" + \ - "cd /home; git clone https://github.com/apache/ambari.git\n" + \ + "yum clean all; yum install git ambari-server -y\n" + \ + "mkdir /home ; cd /home ; git clone https://github.com/apache/ambari.git\n" + \ "cp -r /home/ambari/ambari-server/src/main/resources/stacks/PERF /var/lib/ambari-server/resources/stacks/PERF\n" + \ "cp -r /home/ambari/ambari-server/src/main/resources/stacks/PERF /var/lib/ambari-agent/cache/stacks/PERF\n" + \ + "\n" + \ + "\n" + \ + "yum install mysql-connector-java* -y\n" + \ + "yum install mysql-server -y\n" + \ + "service mysqld start\n" + \ + "mysql -uroot -e \"CREATE DATABASE ambari;\"\n" + \ + "mysql -uroot -e \"SOURCE /var/lib/ambari-server/resources/Ambari-DDL-MySQL-CREATE.sql;\" ambari\n" + \ + "mysql -uroot -e \"CREATE USER 'ambari'@'%' IDENTIFIED BY 'bigdata';\"\n" + \ + "mysql -uroot -e \"GRANT ALL PRIVILEGES ON *.* TO 'ambari'@'%%';\"\n" + \ + "mysql -uroot -e \"CREATE USER 'ambari'@'localhost' IDENTIFIED BY 'bigdata';\"\n" + \ + "mysql -uroot -e \"GRANT ALL PRIVILEGES ON *.* TO 'ambari'@'localhost';\"\n" + \ + "mysql -uroot -e \"CREATE USER 'ambari'@'{0}' IDENTIFIED BY 'bigdata';\"\n".format(server_host_name) + \ + "mysql -uroot -e \"GRANT ALL PRIVILEGES ON *.* TO 'ambari'@'{0}';\"\n".format(server_host_name) + \ + "mysql -uroot -e \"FLUSH PRIVILEGES;\"\n" + \ + "\n" + \ + "\n" + \ "ambari-server setup -s\n" + \ + "ambari-server setup --database mysql --jdbc-db=mysql --jdbc-driver=/usr/share/java/mysql-connector-java.jar --databasehost=localhost --databaseport=3306 --databasename=ambari --databaseusername=ambari --databasepassword=bigdata\n" + \ + "sed -i -e 's/=postgres/=mysql/g' /etc/ambari-server/conf/ambari.properties\n" + \ + "sed -i -e 's/server.persistence.type=local/server.persistence.type=remote/g' /etc/ambari-server/conf/ambari.properties\n" + \ + "sed -i -e 's/local.database.user=postgres//g' /etc/ambari-server/conf/ambari.properties\n" + \ + "sed -i -e 's/server.jdbc.postgres.schema=ambari//g' /etc/ambari-server/conf/ambari.properties\n" + \ "sed -i -e 's/false/true/g' /var/lib/ambari-server/resources/stacks/PERF/1.0/metainfo.xml\n" + \ + "\n" + \ + "echo 'server.jdbc.driver=com.mysql.jdbc.Driver' >> /etc/ambari-server/conf/ambari.properties\n" + \ + "echo 'server.jdbc.rca.url=jdbc:mysql://{0}:3306/ambari' >> /etc/ambari-server/conf/ambari.properties\n".format(server_host_name) + \ + "echo 'server.jdbc.rca.driver=com.mysql.jdbc.Driver' >> /etc/ambari-server/conf/ambari.properties\n" + \ + "echo 'server.jdbc.url=jdbc:mysql://{0}:3306/ambari' >> /etc/ambari-server/conf/ambari.properties\n".format(server_host_name) + \ + "echo 'server.jdbc.port=3306' >> /etc/ambari-server/conf/ambari.properties\n" + \ + "echo 'server.jdbc.hostname=localhost' >> /etc/ambari-server/conf/ambari.properties\n" + \ + "echo 'server.jdbc.driver.path=/usr/share/java/mysql-connector-java.jar' >> /etc/ambari-server/conf/ambari.properties\n" + \ + "\n" + \ "ambari-server start --skip-database-check\n" + \ - "sed -i -e 's/hostname=localhost/hostname={0}/g' /etc/ambari-agent/conf/ambari-agent.ini\n".format(server_host_name) + \ - "sed -i -e 's/agent]/agent]\\nhostname_script={0}\\npublic_hostname_script={1}\\n/1' /etc/ambari-agent/conf/ambari-agent.ini\n".format(hostname_script, public_hostname_script) + \ - "python /home/ambari/ambari-agent/conf/unix/agent-multiplier.py start\n" + \ "exit 0" with open("server.sh", "w") as f: f.write(contents) -def create_agent_script(args, server_host_name): +def create_agent_script(server_host_name): """ Creating agent.sh script in the same dir where current script is located agent.sh script will install, configure and start ambari-agent on host - :param args: Command line args :param server_host_name: Server host name """ @@ -327,7 +384,7 @@ def create_agent_script(args, server_host_name): contents = "#!/bin/bash\n" + \ "wget -O /etc/yum.repos.d/ambari.repo {0}\n".format(ambari_repo_file_url) + \ "yum clean all; yum install git ambari-agent -y\n" + \ - "cd /home; git clone https://github.com/apache/ambari.git\n" + \ + "mkdir /home ; cd /home; git clone https://github.com/apache/ambari.git\n" + \ "cp -r /home/ambari/ambari-server/src/main/resources/stacks/PERF /var/lib/ambari-agent/cache/stacks/PERF\n" + \ "sed -i -e 's/hostname=localhost/hostname={0}/g' /etc/ambari-agent/conf/ambari-agent.ini\n".format(server_host_name) + \ "sed -i -e 's/agent]/agent]\\nhostname_script={0}\\npublic_hostname_script={1}\\n/1' /etc/ambari-agent/conf/ambari-agent.ini\n".format(hostname_script, public_hostname_script) + \ @@ -381,12 +438,27 @@ def put_file(args, ip, local_file, remote_file, fail_message, login='root'): def get_vms_list(args): """ + Get tuple of (x, y) where + x = dictionary from single server host name to ip + y = dictionary from multiple agent host names to ip + :param args: Command line arguments + :return: Tuple of dictionaries of hostnames and ip for server and agents. + """ + # Get the server. + server = __get_vms_list_from_name(args, "{0}-server-{1}".format(cluster_prefix, args.cluster_suffix)) + + # Get the agents + agents = __get_vms_list_from_name(args, "{0}-agent-{1}".format(cluster_prefix, args.cluster_suffix)) + + return (server, agents) + +def __get_vms_list_from_name(args, cluster_name): + """ Method to parse "gce fqdn {cluster-name}" command output and get hosts and ips pairs for every host in cluster :param args: Command line args :return: Mapping of VM host name to ip. """ - - gce_fqdb_cmd = '/usr/sbin/gce fqdn {0}-{1}'.format(cluster_prefix, args.cluster_suffix) + gce_fqdb_cmd = '/usr/sbin/gce fqdn {0}'.format(cluster_name) out = execute_command(args, args.controller, gce_fqdb_cmd, "Failed to get VMs list!", "-tt") lines = out.split('\n') #print "LINES=" + str(lines) @@ -405,13 +477,36 @@ def get_vms_list(args): raise Exception('Cannot parse "{0}"'.format(lines)) +def sort_hosts(hosts): + """ + Sort the hosts by name and take into account the numbers. + :param hosts: Dictionary from host name (e.g., perf-9-test, perf-62-test), to the IP + :return: Sorted list of tuples + """ + host_names = hosts.keys() + sorted_host_tuples = [(None, None),] * len(hosts) + + pattern = re.compile(".*?-agent-.*?(\d+)") + for host_name in host_names: + m = pattern.match(host_name) + if m and len(m.groups()) == 1: + number = int(m.group(1)) + ip = hosts[host_name] + sorted_host_tuples[number - 1] = (host_name, ip) + + return sorted_host_tuples + + def pretty_print_vms(vms): - print "----------------------------" - print "Server IP: {0}".format(sorted(vms.items())[0][1]) + """ + Pretty print the VMs hostnames + :param vms: List of tuples (hostname, ip) + """ + print "==========================" print "Hostnames of nodes in cluster:" - for (hostname, ip) in sorted(vms.items()): + for (hostname, ip) in vms: print hostname - print "----------------------------" + print "==========================\n" if __name__ == "__main__":