This is an automated email from the ASF dual-hosted git repository.
lfrolov pushed a commit to branch DATALAB-1408
in repository https://gitbox.apache.org/repos/asf/incubator-datalab.git
The following commit(s) were added to refs/heads/DATALAB-1408 by this push:
new dbec8bea7 [DATALAB-1408]: hdinsight creation code update
dbec8bea7 is described below
commit dbec8bea79c000d9fb8884f567b010217f0ee67f
Author: leonidfrolov <[email protected]>
AuthorDate: Thu Sep 1 15:29:26 2022 +0300
[DATALAB-1408]: hdinsight creation code update
---
.../src/general/lib/azure/actions_lib.py | 57 ++++----
.../scripts/azure/dataengine-service_create.py | 143 +++++++++++++++++++
.../scripts/azure/dataengine-service_prepare.py | 156 ++++-----------------
3 files changed, 198 insertions(+), 158 deletions(-)
diff --git a/infrastructure-provisioning/src/general/lib/azure/actions_lib.py
b/infrastructure-provisioning/src/general/lib/azure/actions_lib.py
index cdd3dc8f9..563095bf4 100644
--- a/infrastructure-provisioning/src/general/lib/azure/actions_lib.py
+++ b/infrastructure-provisioning/src/general/lib/azure/actions_lib.py
@@ -1172,6 +1172,31 @@ class AzureActions:
file=sys.stdout)}))
traceback.print_exc(file=sys.stdout)
+ def create_hdinsight_cluster(self, resource_group_name, cluster_name,
cluster_parameters):
+ try:
+ print('Starting to create HDInsight Spark cluster
{}'.format(cluster_name))
+ return
self.hdinsight_client.clusters.begin_create(resource_group_name, cluster_name,
cluster_parameters)
+ except Exception as err:
+ logging.info(
+ "Unable to create HDInsight Spark cluster: " + str(err) + "\n
Traceback: " + traceback.print_exc(file=sys.stdout))
+ append_result(str({"error": "Unable to create HDInsight Spark
cluster",
+ "error_message": str(err) + "\n Traceback: " +
traceback.print_exc(
+ file=sys.stdout)}))
+ traceback.print_exc(file=sys.stdout)
+
+
+ def terminate_hdinsight_cluster(self, resource_group_name, cluster_name,
cluster_parameters):
+ try:
+ print('Starting to terminate HDInsight Spark cluster
{}'.format(cluster_name))
+ return
self.hdinsight_client.clusters.begin_delete(resource_group_name, cluster_name,
cluster_parameters)
+ except Exception as err:
+ logging.info(
+ "Unable to terminate HDInsight Spark cluster: " + str(err) +
"\n Traceback: " + traceback.print_exc(file=sys.stdout))
+ append_result(str({"error": "Unable to terminate HDInsight Spark
cluster",
+ "error_message": str(err) + "\n Traceback: " +
traceback.print_exc(
+ file=sys.stdout)}))
+ traceback.print_exc(file=sys.stdout)
+
def ensure_local_jars(os_user, jars_dir):
if not
exists(datalab.fab.conn,'/home/{}/.ensure_dir/local_jars_ensured'.format(os_user)):
@@ -1500,35 +1525,3 @@ def find_des_jars(all_jars, des_path):
except Exception as err:
print('Error:', str(err))
sys.exit(1)
-
-def create_hdinsight_cluster(resource_group_name, instance_name,
cluster_parameters):
- try:
- azure_action = AzureActions()
- client_1 = azure_action.hdinsight_client
- print('Starting to create HDInsight Spark cluster
{}'.format('hdinsight'))
- result = client_1.clusters.begin_create(resource_group_name,
instance_name, cluster_parameters)
-
- return result
- except Exception as err:
- logging.info(
- "Unable to create HDInsight Spark cluster: " + str(err) + "\n
Traceback: " + traceback.print_exc(file=sys.stdout))
- append_result(str({"error": "Unable to create HDInsight Spark cluster",
- "error_message": str(err) + "\n Traceback: " +
traceback.print_exc(
- file=sys.stdout)}))
- traceback.print_exc(file=sys.stdout)
-
-
-def terminate_hdinsight_cluster(resource_group_name, instance_name,
cluster_parameters):
- try:
- azure_action = AzureActions()
- client_1 = azure_action.hdinsight_client
- print('Starting to terminate HDInsight Spark cluster
{}'.format('hdinsight'))
- client_1.clusters.begin_delete(resource_group_name, instance_name,
cluster_parameters)
-
- except Exception as err:
- logging.info(
- "Unable to terminate HDInsight Spark cluster: " + str(err) + "\n
Traceback: " + traceback.print_exc(file=sys.stdout))
- append_result(str({"error": "Unable to terminate HDInsight Spark
cluster",
- "error_message": str(err) + "\n Traceback: " +
traceback.print_exc(
- file=sys.stdout)}))
- traceback.print_exc(file=sys.stdout)
\ No newline at end of file
diff --git
a/infrastructure-provisioning/src/general/scripts/azure/dataengine-service_create.py
b/infrastructure-provisioning/src/general/scripts/azure/dataengine-service_create.py
index e69de29bb..25ebfb34a 100644
---
a/infrastructure-provisioning/src/general/scripts/azure/dataengine-service_create.py
+++
b/infrastructure-provisioning/src/general/scripts/azure/dataengine-service_create.py
@@ -0,0 +1,143 @@
+#!/usr/bin/python3
+
+# *****************************************************************************
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#
******************************************************************************
+
+import argparse
+import json
+import sys
+from datalab.actions_lib import *
+from datalab.meta_lib import *
+from datalab.logger import logging
+from fabric import *
+from azure.mgmt.hdinsight.models import *
+from azure.mgmt.core import *
+from azure.common import *
+from azure.core import *
+
+parser = argparse.ArgumentParser()
+parser.add_argument('--resource_group_name', type=str, help='')
+parser.add_argument('--cluster_name', type=str, help='')
+parser.add_argument('--cluster_version', type=str, help='')
+parser.add_argument('--location', type=str, help='')
+parser.add_argument('--master_instance_type', type=str, help='')
+parser.add_argument('--worker_instance_type', type=str, help='')
+parser.add_argument('--worker_count', type=str, help='')
+parser.add_argument('--storage_account_name', type=str, help='')
+parser.add_argument('--storage_account_key', type=str, help='')
+parser.add_argument('--container_name', type=str, help='')
+parser.add_argument('--tags', type=str, help='')
+parser.add_argument('--public_key', type=str, help='')
+args = parser.parse_args()
+
+def build_hdinsight_cluster(resource_group_name, cluster_name, params):
+ logging.info("Will be created cluster: {}".format(cluster_name))
+ return
datalab.actions_lib.AzureActions().create_hdinsight_cluster(resource_group_name,
cluster_name, params)
+
+def create_cluster_parameters(location, tags, cluster_version,
cluster_login_username, password, master_instance_type,
+ worker_count, worker_instance_type,
storage_account_name, storage_account_key,
+ container_name, public_key):
+
+ # Returns cluster parameters
+
+ return ClusterCreateParametersExtended(
+ location=location,
+ tags=tags,
+ properties=ClusterCreateProperties(
+ cluster_version=cluster_version,
+ os_type=OSType.linux,
+ tier=Tier.standard,
+ cluster_definition=ClusterDefinition(
+ kind="Spark",
+ configurations={
+ "gateway": {
+ "restAuthCredential.isEnabled": "true",
+ "restAuthCredential.username": cluster_login_username,
+ "restAuthCredential.password": password
+ }
+ }
+ ),
+ compute_profile=ComputeProfile(
+ roles=[
+ Role(
+ name="headnode",
+ target_instance_count=2,
+
hardware_profile=HardwareProfile(vm_size=master_instance_type),
+ os_profile=OsProfile(
+
linux_operating_system_profile=LinuxOperatingSystemProfile(
+ username=cluster_login_username,
+ ssh_profile={
+ "publicKeys": [
+ {"certificateData": public_key}
+ ]
+ }
+ )
+ )
+ ),
+ Role(
+ name="workernode",
+ target_instance_count=int(worker_count),
+
hardware_profile=HardwareProfile(vm_size=worker_instance_type),
+ os_profile=OsProfile(
+
linux_operating_system_profile=LinuxOperatingSystemProfile(
+ username=cluster_login_username,
+ ssh_profile={
+ "publicKeys": [
+ {"certificateData": public_key}
+ ]
+ }
+ )
+ )
+ )
+ ]
+ ),
+ storage_profile=StorageProfile(
+ storageaccounts=[
+ StorageAccount(
+ name=storage_account_name + ".blob.core.windows.net",
+ key=storage_account_key,
+ container=container_name.lower(),
+ is_default=True
+ )
+ ]
+ )
+ )
+ )
+
+##############
+# Run script #
+##############
+
+if __name__ == "__main__":
+ parser.print_help()
+
+ params = create_cluster_parameters(args.location, json.loads(args.tags),
args.cluster_version, 'datalab-user',
+ args.password,
args.master_instance_type, args.worker_count,
+ args.worker_instance_type,
args.storage_account_name, args.storage_account_key,
+ args.container_name, args.public_key)
+ build_hdinsight_cluster(args.resource_group_name, args.cluster_name,
params)
+
+ logfile = '{}_creation.log'.format(args.cluster_name)
+ logpath = '/response/' + logfile
+ out = open(logpath, 'w')
+ out.close()
+
+ sys.exit(0)
diff --git
a/infrastructure-provisioning/src/general/scripts/azure/dataengine-service_prepare.py
b/infrastructure-provisioning/src/general/scripts/azure/dataengine-service_prepare.py
index 7bd369910..e3724532d 100644
---
a/infrastructure-provisioning/src/general/scripts/azure/dataengine-service_prepare.py
+++
b/infrastructure-provisioning/src/general/scripts/azure/dataengine-service_prepare.py
@@ -34,102 +34,15 @@ import subprocess
import Crypto.PublicKey
import fabric
import azure.mgmt.hdinsight.models
-#from Crypto.PublicKey import RSA
-#from fabric import *
from azure.mgmt.hdinsight.models import *
from azure.mgmt.core import *
from azure.common import *
from azure.core import *
from datalab.actions_lib import *
-
-CLUSTER_NAME = 'hdinsight-test'
-# The name of your existing Resource Group
-RESOURCE_GROUP_NAME = 'dlab-resource-group'
-# Choose a region. i.e. "East US 2".
-LOCATION = 'West US 2'
-# Cluster login username
-CLUSTER_LOGIN_USER_NAME = 'datalab-user'
-# (SSH) user username
-SSH_USER_NAME = 'datalab-user'
-# Cluster admin password
-PASSWORD = ''
-# The name of blob storage account
-STORAGE_ACCOUNT_NAME = 'hdinsight'
-# Blob storage account key
-STORAGE_ACCOUNT_KEY = ''
-# Blob storage account container name
-CONTAINER_NAME = 'hdinsight'
-# Blob Storage endpoint suffix.
-BLOB_ENDPOINT_SUFFIX = '.blob.core.windows.net'
-
-def create_cluster_parameters(LOCATION, CLUSTER_LOGIN_USER_NAME, PASSWORD,
SSH_USER_NAME):
-
- # Returns cluster parameters
-
- return ClusterCreateParametersExtended(
- location=LOCATION,
- tags={},
- properties=ClusterCreateProperties(
- cluster_version="4.0",
- os_type=OSType.linux,
- tier=Tier.standard,
- cluster_definition=ClusterDefinition(
- kind="Spark",
- configurations={
- "gateway": {
- "restAuthCredential.isEnabled": "true",
- "restAuthCredential.username": CLUSTER_LOGIN_USER_NAME,
- "restAuthCredential.password": PASSWORD
- }
- }
- ),
- compute_profile=ComputeProfile(
- roles=[
- Role(
- name="headnode",
- target_instance_count=2,
-
hardware_profile=HardwareProfile(vm_size="Standard_A4_v2"),
- os_profile=OsProfile(
-
linux_operating_system_profile=LinuxOperatingSystemProfile(
- username=SSH_USER_NAME,
- password=PASSWORD
- )
- )
- ),
- Role(
- name="workernode",
- target_instance_count=2,
-
hardware_profile=HardwareProfile(vm_size="Standard_A4_v2"),
- os_profile=OsProfile(
-
linux_operating_system_profile=LinuxOperatingSystemProfile(
- username=SSH_USER_NAME,
- password=PASSWORD
- )
- )
- )
- ]
- ),
- storage_profile=StorageProfile(
- storageaccounts=[
- StorageAccount(
- name=STORAGE_ACCOUNT_NAME + ".blob.core.windows.net",
- key=STORAGE_ACCOUNT_KEY,
- container=CONTAINER_NAME.lower(),
- is_default=True
- )
- ]
- )
- )
- )
-
if __name__ == "__main__":
- #params = create_cluster_parameters()
- #create_hdinsight_cluster(RESOURCE_GROUP_NAME,CLUSTER_NAME, params)
-
try:
AzureMeta = datalab.meta_lib.AzureMeta()
- AzureActions = datalab.actions_lib.AzureActions()
logging.info('Generating infrastructure names and tags')
hdinsight_conf = dict()
if 'exploratory_name' in os.environ:
@@ -142,57 +55,48 @@ if __name__ == "__main__":
hdinsight_conf['computational_name'] = ''
hdinsight_conf['service_base_name'] =
(os.environ['conf_service_base_name'])
- hdinsight_conf['edge_user_name'] = (os.environ['edge_user_name'])
hdinsight_conf['project_name'] =
(os.environ['project_name']).replace('_', '-').lower()
- hdinsight_conf['project_tag'] = hdinsight_conf['project_name']
hdinsight_conf['endpoint_name'] =
(os.environ['endpoint_name']).replace('_', '-').lower()
- hdinsight_conf['endpoint_tag'] = hdinsight_conf['endpoint_name']
hdinsight_conf['key_name'] = os.environ['conf_key_name']
hdinsight_conf['key_path'] =
'{0}{1}.pem'.format(os.environ['conf_key_dir'], os.environ['conf_key_name'])
- hdinsight_conf['zone'] = os.environ['gcp_zone']
hdinsight_conf['resource_group_name'] =
os.environ['azure_resource_group_name']
hdinsight_conf['region'] = os.environ['azure_region']
- data_engine['vpc_name'] = os.environ['azure_vpc_name']
- data_engine['private_subnet_name'] =
'{}-{}-{}-subnet'.format(data_engine['service_base_name'],
-
data_engine['project_name'],
-
data_engine['endpoint_name'])
- data_engine['private_subnet_cidr'] =
AzureMeta.get_subnet(data_engine['resource_group_name'],
-
data_engine['vpc_name'],
-
data_engine['private_subnet_name']).address_prefix
- data_engine['cluster_name'] =
'{}-{}-{}-des-{}'.format(data_engine['service_base_name'],
-
data_engine['project_name'],
-
data_engine['endpoint_name'],
-
data_engine['computational_name'])
-
-
-
- hdinsight_conf['subnet'] =
'{0}-{1}-{2}-subnet'.format(hdinsight_conf['service_base_name'],
-
hdinsight_conf['project_name'],
-
hdinsight_conf['endpoint_name'])
- hdinsight_conf['cluster_name'] =
'{0}-{1}-{2}-des-{3}'.format(hdinsight_conf['service_base_name'],
-
hdinsight_conf['project_name'],
-
hdinsight_conf['endpoint_name'],
-
hdinsight_conf['computational_name'])
- hdinsight_conf['cluster_tag'] =
'{0}-{1}-{2}-ps'.format(hdinsight_conf['service_base_name'],
-
hdinsight_conf['project_name'],
-
hdinsight_conf['endpoint_name'])
- hdinsight_conf['bucket_name'] =
'{0}-{1}-{2}-bucket'.format(hdinsight_conf['service_base_name'],
-
hdinsight_conf['project_name'],
-
hdinsight_conf['endpoint_name'])
-
- hdinsight_conf['edge_instance_hostname'] =
'{0}-{1}-{2}-edge'.format(hdinsight_conf['service_base_name'],
-
hdinsight_conf['project_name'],
-
hdinsight_conf['endpoint_name'])
- hdinsight_conf['datalab_ssh_user'] = os.environ['conf_os_user']
+ hdinsight_conf['cluster_name'] =
'{}-{}-{}-des-{}'.format(hdinsight_conf['service_base_name'],
+
hdinsight_conf['project_name'],
+
hdinsight_conf['endpoint_name'],
+
hdinsight_conf['computational_name'])
+
+ hdinsight_conf['cluster_tags'] = {
+ "name": hdinsight_conf['cluster_name'],
+ "sbn": hdinsight_conf['service_base_name'],
+ "notebook_name": os.environ['notebook_instance_name'],
+ "product": "datalab",
+ "computational_name": hdinsight_conf['computational_name'],
+ "project": hdinsight_conf['project_name'],
+ "endpoint": hdinsight_conf['endpoint_name']
+ }
+
+ hdinsight_conf['release_label'] = os.environ['hdinsight_version']
+ key = RSA.importKey(open(hdinsight_conf['key_path'], 'rb').read())
+ ssh_admin_pubkey = key.publickey().exportKey("OpenSSH").decode('UTF-8')
except Exception as err:
datalab.fab.append_result("Failed to generate variables dictionary.
Exception:" + str(err))
sys.exit(1)
try:
logging.info('[Creating HDInsight Cluster]')
- params = "--region {0} --bucket {1} --params
'{2}'".format(hdinsight_conf['region'],
-
hdinsight_conf['bucket_name'],
-
json.dumps(hdinsight_cluster))
+ params = "--resource_group_name {} --cluster_name {} " \
+ "--cluster_version {} --location {} " \
+ "--master_instance_type {} --worker_instance_type {} " \
+ "--worker_count {} --storage_account_name {} " \
+ "--storage_account_key {} --container_name {} " \
+ "--tags '{}' --public_key {}"\
+ .format(hdinsight_conf['resource_group_name'],
hdinsight_conf['cluster_name'],
+ hdinsight_conf['release_label'], hdinsight_conf['region'],
+ os.environ['hdinsight_master_instance_type'],
os.environ['hdinsight_slave_instance_type'],
+ os.environ['hdinsight_slave_count'],
hdinsight_conf['storage_account_name'],
+ hdinsight_conf['storage_account_key'],
hdinsight_conf['container_name'],
+ json.dumps(hdinsight_conf['cluster_tags']),
ssh_admin_pubkey)
try:
subprocess.run("~/scripts/{}.py
{}".format('dataengine-service_create', params), shell=True, check=True)
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]