This is an automated email from the ASF dual-hosted git repository.

lfrolov pushed a commit to branch DATALAB-1408
in repository https://gitbox.apache.org/repos/asf/incubator-datalab.git


The following commit(s) were added to refs/heads/DATALAB-1408 by this push:
     new dbec8bea7 [DATALAB-1408]: hdinsight creation code update
dbec8bea7 is described below

commit dbec8bea79c000d9fb8884f567b010217f0ee67f
Author: leonidfrolov <[email protected]>
AuthorDate: Thu Sep 1 15:29:26 2022 +0300

    [DATALAB-1408]: hdinsight creation code update
---
 .../src/general/lib/azure/actions_lib.py           |  57 ++++----
 .../scripts/azure/dataengine-service_create.py     | 143 +++++++++++++++++++
 .../scripts/azure/dataengine-service_prepare.py    | 156 ++++-----------------
 3 files changed, 198 insertions(+), 158 deletions(-)

diff --git a/infrastructure-provisioning/src/general/lib/azure/actions_lib.py 
b/infrastructure-provisioning/src/general/lib/azure/actions_lib.py
index cdd3dc8f9..563095bf4 100644
--- a/infrastructure-provisioning/src/general/lib/azure/actions_lib.py
+++ b/infrastructure-provisioning/src/general/lib/azure/actions_lib.py
@@ -1172,6 +1172,31 @@ class AzureActions:
                                    file=sys.stdout)}))
             traceback.print_exc(file=sys.stdout)
 
+    def create_hdinsight_cluster(self, resource_group_name, cluster_name, 
cluster_parameters):
+        try:
+            print('Starting to create HDInsight Spark cluster 
{}'.format(cluster_name))
+            return 
self.hdinsight_client.clusters.begin_create(resource_group_name, cluster_name, 
cluster_parameters)
+        except Exception as err:
+            logging.info(
+                "Unable to create HDInsight Spark cluster: " + str(err) + "\n 
Traceback: " + traceback.print_exc(file=sys.stdout))
+            append_result(str({"error": "Unable to create HDInsight Spark 
cluster",
+                               "error_message": str(err) + "\n Traceback: " + 
traceback.print_exc(
+                                   file=sys.stdout)}))
+            traceback.print_exc(file=sys.stdout)
+
+
+    def terminate_hdinsight_cluster(self, resource_group_name, cluster_name, 
cluster_parameters):
+        try:
+            print('Starting to terminate HDInsight Spark cluster 
{}'.format(cluster_name))
+            return 
self.hdinsight_client.clusters.begin_delete(resource_group_name, cluster_name, 
cluster_parameters)
+        except Exception as err:
+            logging.info(
+                "Unable to terminate HDInsight Spark cluster: " + str(err) + 
"\n Traceback: " + traceback.print_exc(file=sys.stdout))
+            append_result(str({"error": "Unable to terminate HDInsight Spark 
cluster",
+                               "error_message": str(err) + "\n Traceback: " + 
traceback.print_exc(
+                                   file=sys.stdout)}))
+            traceback.print_exc(file=sys.stdout)
+
 
 def ensure_local_jars(os_user, jars_dir):
     if not 
exists(datalab.fab.conn,'/home/{}/.ensure_dir/local_jars_ensured'.format(os_user)):
@@ -1500,35 +1525,3 @@ def find_des_jars(all_jars, des_path):
     except Exception as err:
         print('Error:', str(err))
         sys.exit(1)
-
-def create_hdinsight_cluster(resource_group_name, instance_name, 
cluster_parameters):
-    try:
-        azure_action = AzureActions()
-        client_1 = azure_action.hdinsight_client
-        print('Starting to create HDInsight Spark cluster 
{}'.format('hdinsight'))
-        result = client_1.clusters.begin_create(resource_group_name, 
instance_name, cluster_parameters)
-
-        return result
-    except Exception as err:
-        logging.info(
-            "Unable to create HDInsight Spark cluster: " + str(err) + "\n 
Traceback: " + traceback.print_exc(file=sys.stdout))
-        append_result(str({"error": "Unable to create HDInsight Spark cluster",
-                           "error_message": str(err) + "\n Traceback: " + 
traceback.print_exc(
-                               file=sys.stdout)}))
-        traceback.print_exc(file=sys.stdout)
-
-
-def terminate_hdinsight_cluster(resource_group_name, instance_name, 
cluster_parameters):
-    try:
-        azure_action = AzureActions()
-        client_1 = azure_action.hdinsight_client
-        print('Starting to terminate HDInsight Spark cluster 
{}'.format('hdinsight'))
-        client_1.clusters.begin_delete(resource_group_name, instance_name, 
cluster_parameters)
-
-    except Exception as err:
-        logging.info(
-            "Unable to terminate HDInsight Spark cluster: " + str(err) + "\n 
Traceback: " + traceback.print_exc(file=sys.stdout))
-        append_result(str({"error": "Unable to terminate HDInsight Spark 
cluster",
-                           "error_message": str(err) + "\n Traceback: " + 
traceback.print_exc(
-                               file=sys.stdout)}))
-        traceback.print_exc(file=sys.stdout)
\ No newline at end of file
diff --git 
a/infrastructure-provisioning/src/general/scripts/azure/dataengine-service_create.py
 
b/infrastructure-provisioning/src/general/scripts/azure/dataengine-service_create.py
index e69de29bb..25ebfb34a 100644
--- 
a/infrastructure-provisioning/src/general/scripts/azure/dataengine-service_create.py
+++ 
b/infrastructure-provisioning/src/general/scripts/azure/dataengine-service_create.py
@@ -0,0 +1,143 @@
+#!/usr/bin/python3
+
+# *****************************************************************************
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+# 
******************************************************************************
+
+import argparse
+import json
+import sys
+from datalab.actions_lib import *
+from datalab.meta_lib import *
+from datalab.logger import logging
+from fabric import *
+from azure.mgmt.hdinsight.models import *
+from azure.mgmt.core import *
+from azure.common import *
+from azure.core import *
+
+parser = argparse.ArgumentParser()
+parser.add_argument('--resource_group_name', type=str, help='')
+parser.add_argument('--cluster_name', type=str, help='')
+parser.add_argument('--cluster_version', type=str, help='')
+parser.add_argument('--location', type=str, help='')
+parser.add_argument('--master_instance_type', type=str, help='')
+parser.add_argument('--worker_instance_type', type=str, help='')
+parser.add_argument('--worker_count', type=str, help='')
+parser.add_argument('--storage_account_name', type=str, help='')
+parser.add_argument('--storage_account_key', type=str, help='')
+parser.add_argument('--container_name', type=str, help='')
+parser.add_argument('--tags', type=str, help='')
+parser.add_argument('--public_key', type=str, help='')
+args = parser.parse_args()
+
+def build_hdinsight_cluster(resource_group_name, cluster_name, params):
+    logging.info("Will be created cluster: {}".format(cluster_name))
+    return 
datalab.actions_lib.AzureActions().create_hdinsight_cluster(resource_group_name,
 cluster_name, params)
+
+def create_cluster_parameters(location, tags, cluster_version, 
cluster_login_username, password, master_instance_type,
+                              worker_count, worker_instance_type, 
storage_account_name, storage_account_key,
+                              container_name, public_key):
+
+    # Returns cluster parameters
+
+    return ClusterCreateParametersExtended(
+        location=location,
+        tags=tags,
+        properties=ClusterCreateProperties(
+            cluster_version=cluster_version,
+            os_type=OSType.linux,
+            tier=Tier.standard,
+            cluster_definition=ClusterDefinition(
+                kind="Spark",
+                configurations={
+                    "gateway": {
+                        "restAuthCredential.isEnabled": "true",
+                        "restAuthCredential.username": cluster_login_username,
+                        "restAuthCredential.password": password
+                    }
+                }
+            ),
+            compute_profile=ComputeProfile(
+                roles=[
+                    Role(
+                        name="headnode",
+                        target_instance_count=2,
+                        
hardware_profile=HardwareProfile(vm_size=master_instance_type),
+                        os_profile=OsProfile(
+                            
linux_operating_system_profile=LinuxOperatingSystemProfile(
+                                username=cluster_login_username,
+                                ssh_profile={
+                                    "publicKeys": [
+                                        {"certificateData": public_key}
+                                    ]
+                                }
+                            )
+                        )
+                    ),
+                    Role(
+                        name="workernode",
+                        target_instance_count=int(worker_count),
+                        
hardware_profile=HardwareProfile(vm_size=worker_instance_type),
+                        os_profile=OsProfile(
+                            
linux_operating_system_profile=LinuxOperatingSystemProfile(
+                                username=cluster_login_username,
+                                ssh_profile={
+                                    "publicKeys": [
+                                        {"certificateData": public_key}
+                                    ]
+                                }
+                            )
+                        )
+                    )
+                ]
+            ),
+            storage_profile=StorageProfile(
+                storageaccounts=[
+                    StorageAccount(
+                        name=storage_account_name + ".blob.core.windows.net",
+                        key=storage_account_key,
+                        container=container_name.lower(),
+                        is_default=True
+                    )
+                ]
+            )
+        )
+    )
+
+##############
+# Run script #
+##############
+
+if __name__ == "__main__":
+    parser.print_help()
+
+    params = create_cluster_parameters(args.location, json.loads(args.tags), 
args.cluster_version, 'datalab-user',
+                                       args.password, 
args.master_instance_type, args.worker_count,
+                                       args.worker_instance_type, 
args.storage_account_name, args.storage_account_key,
+                                       args.container_name, args.public_key)
+    build_hdinsight_cluster(args.resource_group_name, args.cluster_name, 
params)
+
+    logfile = '{}_creation.log'.format(args.cluster_name)
+    logpath = '/response/' + logfile
+    out = open(logpath, 'w')
+    out.close()
+
+    sys.exit(0)
diff --git 
a/infrastructure-provisioning/src/general/scripts/azure/dataengine-service_prepare.py
 
b/infrastructure-provisioning/src/general/scripts/azure/dataengine-service_prepare.py
index 7bd369910..e3724532d 100644
--- 
a/infrastructure-provisioning/src/general/scripts/azure/dataengine-service_prepare.py
+++ 
b/infrastructure-provisioning/src/general/scripts/azure/dataengine-service_prepare.py
@@ -34,102 +34,15 @@ import subprocess
 import Crypto.PublicKey
 import fabric
 import azure.mgmt.hdinsight.models
-#from Crypto.PublicKey import RSA
-#from fabric import *
 from azure.mgmt.hdinsight.models import *
 from azure.mgmt.core import *
 from azure.common import *
 from azure.core import *
 from datalab.actions_lib import *
 
-
-CLUSTER_NAME = 'hdinsight-test'
-# The name of your existing Resource Group
-RESOURCE_GROUP_NAME = 'dlab-resource-group'
-# Choose a region. i.e. "East US 2".
-LOCATION = 'West US 2'
-# Cluster login username
-CLUSTER_LOGIN_USER_NAME = 'datalab-user'
-# (SSH) user username
-SSH_USER_NAME = 'datalab-user'
-# Cluster admin password
-PASSWORD = ''
-# The name of blob storage account
-STORAGE_ACCOUNT_NAME = 'hdinsight'
-# Blob storage account key
-STORAGE_ACCOUNT_KEY = ''
-# Blob storage account container name
-CONTAINER_NAME = 'hdinsight'
-# Blob Storage endpoint suffix.
-BLOB_ENDPOINT_SUFFIX = '.blob.core.windows.net'
-
-def create_cluster_parameters(LOCATION, CLUSTER_LOGIN_USER_NAME, PASSWORD, 
SSH_USER_NAME):
-
-    # Returns cluster parameters
-
-    return ClusterCreateParametersExtended(
-        location=LOCATION,
-        tags={},
-        properties=ClusterCreateProperties(
-            cluster_version="4.0",
-            os_type=OSType.linux,
-            tier=Tier.standard,
-            cluster_definition=ClusterDefinition(
-                kind="Spark",
-                configurations={
-                    "gateway": {
-                        "restAuthCredential.isEnabled": "true",
-                        "restAuthCredential.username": CLUSTER_LOGIN_USER_NAME,
-                        "restAuthCredential.password": PASSWORD
-                    }
-                }
-            ),
-            compute_profile=ComputeProfile(
-                roles=[
-                    Role(
-                        name="headnode",
-                        target_instance_count=2,
-                        
hardware_profile=HardwareProfile(vm_size="Standard_A4_v2"),
-                        os_profile=OsProfile(
-                            
linux_operating_system_profile=LinuxOperatingSystemProfile(
-                                username=SSH_USER_NAME,
-                                password=PASSWORD
-                            )
-                        )
-                    ),
-                    Role(
-                        name="workernode",
-                        target_instance_count=2,
-                        
hardware_profile=HardwareProfile(vm_size="Standard_A4_v2"),
-                        os_profile=OsProfile(
-                            
linux_operating_system_profile=LinuxOperatingSystemProfile(
-                                username=SSH_USER_NAME,
-                                password=PASSWORD
-                            )
-                        )
-                    )
-                ]
-            ),
-            storage_profile=StorageProfile(
-                storageaccounts=[
-                    StorageAccount(
-                        name=STORAGE_ACCOUNT_NAME + ".blob.core.windows.net",
-                        key=STORAGE_ACCOUNT_KEY,
-                        container=CONTAINER_NAME.lower(),
-                        is_default=True
-                    )
-                ]
-            )
-        )
-    )
-
 if __name__ == "__main__":
-    #params = create_cluster_parameters()
-    #create_hdinsight_cluster(RESOURCE_GROUP_NAME,CLUSTER_NAME, params)
-    
     try:
         AzureMeta = datalab.meta_lib.AzureMeta()
-        AzureActions = datalab.actions_lib.AzureActions()
         logging.info('Generating infrastructure names and tags')
         hdinsight_conf = dict()
         if 'exploratory_name' in os.environ:
@@ -142,57 +55,48 @@ if __name__ == "__main__":
             hdinsight_conf['computational_name'] = ''
 
         hdinsight_conf['service_base_name'] = 
(os.environ['conf_service_base_name'])
-        hdinsight_conf['edge_user_name'] = (os.environ['edge_user_name'])
         hdinsight_conf['project_name'] = 
(os.environ['project_name']).replace('_', '-').lower()
-        hdinsight_conf['project_tag'] = hdinsight_conf['project_name']
         hdinsight_conf['endpoint_name'] = 
(os.environ['endpoint_name']).replace('_', '-').lower()
-        hdinsight_conf['endpoint_tag'] = hdinsight_conf['endpoint_name']
         hdinsight_conf['key_name'] = os.environ['conf_key_name']
         hdinsight_conf['key_path'] = 
'{0}{1}.pem'.format(os.environ['conf_key_dir'], os.environ['conf_key_name'])
-        hdinsight_conf['zone'] = os.environ['gcp_zone']
         hdinsight_conf['resource_group_name'] = 
os.environ['azure_resource_group_name']
         hdinsight_conf['region'] = os.environ['azure_region']
-        data_engine['vpc_name'] = os.environ['azure_vpc_name']
-        data_engine['private_subnet_name'] = 
'{}-{}-{}-subnet'.format(data_engine['service_base_name'],
-                                                                      
data_engine['project_name'],
-                                                                      
data_engine['endpoint_name'])
-        data_engine['private_subnet_cidr'] = 
AzureMeta.get_subnet(data_engine['resource_group_name'],
-                                                                  
data_engine['vpc_name'],
-                                                                  
data_engine['private_subnet_name']).address_prefix
-        data_engine['cluster_name'] = 
'{}-{}-{}-des-{}'.format(data_engine['service_base_name'],
-                                                              
data_engine['project_name'],
-                                                              
data_engine['endpoint_name'],
-                                                              
data_engine['computational_name'])
-
-
-
-        hdinsight_conf['subnet'] = 
'{0}-{1}-{2}-subnet'.format(hdinsight_conf['service_base_name'],
-                                                              
hdinsight_conf['project_name'],
-                                                              
hdinsight_conf['endpoint_name'])
-        hdinsight_conf['cluster_name'] = 
'{0}-{1}-{2}-des-{3}'.format(hdinsight_conf['service_base_name'],
-                                                                     
hdinsight_conf['project_name'],
-                                                                     
hdinsight_conf['endpoint_name'],
-                                                                     
hdinsight_conf['computational_name'])
-        hdinsight_conf['cluster_tag'] = 
'{0}-{1}-{2}-ps'.format(hdinsight_conf['service_base_name'],
-                                                               
hdinsight_conf['project_name'],
-                                                               
hdinsight_conf['endpoint_name'])
-        hdinsight_conf['bucket_name'] = 
'{0}-{1}-{2}-bucket'.format(hdinsight_conf['service_base_name'],
-                                                                   
hdinsight_conf['project_name'],
-                                                                   
hdinsight_conf['endpoint_name'])
-
-        hdinsight_conf['edge_instance_hostname'] = 
'{0}-{1}-{2}-edge'.format(hdinsight_conf['service_base_name'],
-                                                                            
hdinsight_conf['project_name'],
-                                                                            
hdinsight_conf['endpoint_name'])
-        hdinsight_conf['datalab_ssh_user'] = os.environ['conf_os_user']
+        hdinsight_conf['cluster_name'] = 
'{}-{}-{}-des-{}'.format(hdinsight_conf['service_base_name'],
+                                                                  
hdinsight_conf['project_name'],
+                                                                  
hdinsight_conf['endpoint_name'],
+                                                                  
hdinsight_conf['computational_name'])
+
+        hdinsight_conf['cluster_tags'] = {
+            "name": hdinsight_conf['cluster_name'],
+            "sbn": hdinsight_conf['service_base_name'],
+            "notebook_name": os.environ['notebook_instance_name'],
+            "product": "datalab",
+            "computational_name": hdinsight_conf['computational_name'],
+            "project": hdinsight_conf['project_name'],
+            "endpoint": hdinsight_conf['endpoint_name']
+        }
+
+        hdinsight_conf['release_label'] = os.environ['hdinsight_version']
+        key = RSA.importKey(open(hdinsight_conf['key_path'], 'rb').read())
+        ssh_admin_pubkey = key.publickey().exportKey("OpenSSH").decode('UTF-8')
     except Exception as err:
         datalab.fab.append_result("Failed to generate variables dictionary. 
Exception:" + str(err))
         sys.exit(1)
 
     try:
         logging.info('[Creating HDInsight Cluster]')
-        params = "--region {0} --bucket {1} --params 
'{2}'".format(hdinsight_conf['region'],
-                                                                   
hdinsight_conf['bucket_name'],
-                                                                   
json.dumps(hdinsight_cluster))
+        params = "--resource_group_name {} --cluster_name {} " \
+                 "--cluster_version {} --location {} " \
+                 "--master_instance_type {} --worker_instance_type {} " \
+                 "--worker_count {} --storage_account_name {} " \
+                 "--storage_account_key {} --container_name {} " \
+                 "--tags '{}' --public_key {}"\
+            .format(hdinsight_conf['resource_group_name'], 
hdinsight_conf['cluster_name'],
+                    hdinsight_conf['release_label'], hdinsight_conf['region'],
+                    os.environ['hdinsight_master_instance_type'], 
os.environ['hdinsight_slave_instance_type'],
+                    os.environ['hdinsight_slave_count'], 
hdinsight_conf['storage_account_name'],
+                    hdinsight_conf['storage_account_key'], 
hdinsight_conf['container_name'],
+                    json.dumps(hdinsight_conf['cluster_tags']), 
ssh_admin_pubkey)
 
         try:
             subprocess.run("~/scripts/{}.py 
{}".format('dataengine-service_create', params), shell=True, check=True)


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to