This is an automated email from the ASF dual-hosted git repository. lfrolov pushed a commit to branch DATALAB-1408 in repository https://gitbox.apache.org/repos/asf/incubator-datalab.git
commit bd5f36720bed6ac7fcca3097eac68e8f3d493066 Author: leonidfrolov <[email protected]> AuthorDate: Wed Sep 7 18:12:30 2022 +0300 [DATALAB-1408]: fixed minor bugs, changed zookeeper shape to A2 --- .../scripts/azure/dataengine-service_configure.py | 90 +++++++++++++++------- .../scripts/azure/dataengine-service_create.py | 16 ++-- .../scripts/azure/dataengine-service_terminate.py | 2 +- .../src/general/scripts/azure/project_terminate.py | 3 +- 4 files changed, 75 insertions(+), 36 deletions(-) diff --git a/infrastructure-provisioning/src/general/scripts/azure/dataengine-service_configure.py b/infrastructure-provisioning/src/general/scripts/azure/dataengine-service_configure.py index 9973c3d5c..ed0a9ab85 100644 --- a/infrastructure-provisioning/src/general/scripts/azure/dataengine-service_configure.py +++ b/infrastructure-provisioning/src/general/scripts/azure/dataengine-service_configure.py @@ -21,40 +21,78 @@ # # ****************************************************************************** +import datalab.actions_lib +import datalab.fab +import datalab.meta_lib +import json +from datalab.logger import logging +import multiprocessing +import os +import sys +import traceback +import subprocess +from Crypto.PublicKey import RSA +from fabric import * + if __name__ == "__main__": try: - data_engine['service_base_name'] = os.environ['conf_service_base_name'] - data_engine['resource_group_name'] = os.environ['azure_resource_group_name'] - data_engine['region'] = os.environ['azure_region'] - data_engine['key_name'] = os.environ['conf_key_name'] - data_engine['vpc_name'] = os.environ['azure_vpc_name'] - data_engine['user_name'] = os.environ['edge_user_name'] - data_engine['project_name'] = os.environ['project_name'] - data_engine['project_tag'] = data_engine['project_name'] - data_engine['endpoint_name'] = os.environ['endpoint_name'] - data_engine['endpoint_tag'] = data_engine['endpoint_name'] - data_engine['master_node_name'] = '{}-m'.format(data_engine['cluster_name']) - data_engine['key_name'] = os.environ['conf_key_name'] + AzureMeta = datalab.meta_lib.AzureMeta() + AzureActions = datalab.actions_lib.AzureActions() + logging.info('Generating infrastructure names and tags') + hdinsight_conf = dict() + hdinsight_conf['service_base_name'] = os.environ['conf_service_base_name'] + hdinsight_conf['resource_group_name'] = os.environ['azure_resource_group_name'] + hdinsight_conf['region'] = os.environ['azure_region'] + hdinsight_conf['key_name'] = os.environ['conf_key_name'] + hdinsight_conf['vpc_name'] = os.environ['azure_vpc_name'] + hdinsight_conf['user_name'] = os.environ['edge_user_name'] + hdinsight_conf['project_name'] = os.environ['project_name'] + hdinsight_conf['project_tag'] = hdinsight_conf['project_name'] + hdinsight_conf['endpoint_name'] = os.environ['endpoint_name'] + hdinsight_conf['endpoint_tag'] = hdinsight_conf['endpoint_name'] + hdinsight_conf['key_name'] = os.environ['conf_key_name'] + hdinsight_conf['hdinsight_master_instance_type'] = os.environ['hdinsight_master_instance_type'] + hdinsight_conf['hdinsight_slave_instance_type'] = os.environ['hdinsight_slave_instance_type'] if 'computational_name' in os.environ: - data_engine['computational_name'] = os.environ['computational_name'] + hdinsight_conf['computational_name'] = os.environ['computational_name'] else: - data_engine['computational_name'] = '' - data_engine['cluster_name'] = '{}-{}-{}-des-{}'.format(data_engine['service_base_name'], - data_engine['project_name'], - data_engine['endpoint_name'], - data_engine['computational_name']) + hdinsight_conf['computational_name'] = '' + hdinsight_conf['cluster_name'] = '{}-{}-{}-des-{}'.format(hdinsight_conf['service_base_name'], + hdinsight_conf['project_name'], + hdinsight_conf['endpoint_name'], + hdinsight_conf['computational_name']) + hdinsight_conf['cluster_url'] = 'https://{}.azurehdinsight.net'.format(hdinsight_conf['cluster_name']) + hdinsight_conf['cluster_jupyter_url'] = '{}/jupyter/'.format(hdinsight_conf['cluster_url']) + hdinsight_conf['cluster_sparkhistory_url'] = '{}/sparkhistory/'.format(hdinsight_conf['cluster_url']) + hdinsight_conf['cluster_zeppelin_url'] = '{}/zeppelin/'.format(hdinsight_conf['cluster_url']) + logging.info('[SUMMARY]') + logging.info("Service base name: {}".format(hdinsight_conf['service_base_name'])) + logging.info("Region: {}".format(hdinsight_conf['region'])) + logging.info("Cluster name: {}".format(hdinsight_conf['cluster_name'])) + logging.info("Master node shape: {}".format(hdinsight_conf['hdinsight_master_instance_type'])) + logging.info("Slave node shape: {}".format(hdinsight_conf['hdinsight_slave_instance_type'])) + logging.info("Instance count: {}".format(str(os.environ['hdinsight_count']))) + logging.info("URL access username: datalab-user") + logging.info("URL access password: {}".format(os.environ['access_password'])) + with open("/root/result.json", 'w') as result: - res = {"hostname": data_engine['cluster_name'], - "instance_id": data_engine['master_node_name'], - "key_name": data_engine['key_name'], + res = {"hostname": hdinsight_conf['cluster_name'], + "key_name": hdinsight_conf['key_name'], "Action": "Create new HDInsight cluster", "computational_url": [ {"description": "HDInsight cluster", - "url": "spark_master_access_url"} - # {"description": "Apache Spark Master (via tunnel)", - # "url": spark_master_url} + "url": hdinsight_conf['cluster_url']}, + {"description": "Apache Spark History", + "url": hdinsight_conf['cluster_sparkhistory_url']}, + {"description": "Jupyter notebook", + "url": hdinsight_conf['cluster_jupyter_url']}, + {"description": "Zeppelin notebook", + "url": hdinsight_conf['cluster_zeppelin_url']} ] } result.write(json.dumps(res)) - except: - pass \ No newline at end of file + except Exception as err: + traceback.print_exc() + datalab.fab.append_result("Error with writing results", str(err)) + AzureActions.terminate_hdinsight_cluster(hdinsight_conf['resource_group_name'], hdinsight_conf['cluster_name']) + sys.exit(1) diff --git a/infrastructure-provisioning/src/general/scripts/azure/dataengine-service_create.py b/infrastructure-provisioning/src/general/scripts/azure/dataengine-service_create.py index fd0aca6a0..a754e99e2 100644 --- a/infrastructure-provisioning/src/general/scripts/azure/dataengine-service_create.py +++ b/infrastructure-provisioning/src/general/scripts/azure/dataengine-service_create.py @@ -25,6 +25,7 @@ import argparse import json import sys import secrets +import os from datalab.actions_lib import * from datalab.meta_lib import * from datalab.logger import logging @@ -123,7 +124,7 @@ def create_cluster_parameters(location, tags, cluster_version, cluster_login_use Role( name="zookeepernode", target_instance_count=3, - hardware_profile=HardwareProfile(vm_size="Standard_A4_v2"), + hardware_profile=HardwareProfile(vm_size="Standard_A2_v2"), os_profile=OsProfile( linux_operating_system_profile=LinuxOperatingSystemProfile( username=cluster_login_username, @@ -161,6 +162,7 @@ def create_cluster_parameters(location, tags, cluster_version, cluster_login_use if __name__ == "__main__": #parser.print_help() password = secrets.token_urlsafe(20) + os.environ['access_password'] = password params = create_cluster_parameters(args.location, json.loads(args.tags), args.cluster_version, 'datalab-user', password, args.master_instance_type, args.worker_count, args.worker_instance_type, args.storage_account_name, args.storage_account_key, @@ -168,9 +170,9 @@ if __name__ == "__main__": build_hdinsight_cluster(args.resource_group_name, args.cluster_name, params) - logfile = '{}_creation.log'.format(args.cluster_name) - logpath = '/response/' + logfile - out = open(logpath, 'w') - out.close() - - sys.exit(0) + # logfile = '{}_creation.log'.format(args.cluster_name) + # logpath = '/response/' + logfile + # out = open(logpath, 'w') + # out.close() + # + # sys.exit(0) diff --git a/infrastructure-provisioning/src/general/scripts/azure/dataengine-service_terminate.py b/infrastructure-provisioning/src/general/scripts/azure/dataengine-service_terminate.py index 4a168cf9a..1c7701644 100644 --- a/infrastructure-provisioning/src/general/scripts/azure/dataengine-service_terminate.py +++ b/infrastructure-provisioning/src/general/scripts/azure/dataengine-service_terminate.py @@ -67,7 +67,7 @@ if __name__ == "__main__": logging.info('[TERMINATE HDINSIGHT CLUSTER AND ASSOCIATED RESOURCES]') try: cluster = AzureMeta.get_hdinsight_cluster(hdinsight_conf['resource_group_name'], hdinsight_conf['cluster_name']) - if cluster.properties.cluster_state == 'Running': + if cluster and cluster.properties.cluster_state == 'Running': AzureActions.terminate_hdinsight_cluster(hdinsight_conf['resource_group_name'], hdinsight_conf['cluster_name']) for storage_account in AzureMeta.list_storage_accounts(hdinsight_conf['resource_group_name']): diff --git a/infrastructure-provisioning/src/general/scripts/azure/project_terminate.py b/infrastructure-provisioning/src/general/scripts/azure/project_terminate.py index bd400ac43..092310b62 100644 --- a/infrastructure-provisioning/src/general/scripts/azure/project_terminate.py +++ b/infrastructure-provisioning/src/general/scripts/azure/project_terminate.py @@ -40,8 +40,7 @@ def terminate_edge_node(resource_group_name, service_base_name, project_tag, sub for cluster in clusters_list: if "sbn" in cluster.tags and service_base_name == cluster.tags["sbn"] and \ "project" in cluster.tags and cluster.tags['project'] == project_tag: - print(cluster.name + ' found for termination') - #AzureActions.terminate_hdinsight_cluster(cluster.name, region) + AzureActions.terminate_hdinsight_cluster(resource_group_name, cluster.name) logging.info('The HDinsight cluster {} has been terminated successfully'.format(cluster.name)) else: logging.info("There are no HDinsight clusters to terminate.") --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
