This is an automated email from the ASF dual-hosted git repository. lfrolov pushed a commit to branch DATALAB-2609 in repository https://gitbox.apache.org/repos/asf/incubator-datalab.git
commit 5315224117c3218119ec48ed2d0c4ef1b06d4cc3 Author: leonidfrolov <[email protected]> AuthorDate: Fri Nov 19 16:34:24 2021 +0200 [DATALAB-2609]: added azure vmAgent status check --- .../src/general/lib/azure/actions_lib.py | 12 ++++++++++++ .../src/general/lib/azure/meta_lib.py | 20 ++++++++++++++++++++ .../scripts/azure/common_create_notebook_image.py | 11 ++++++++--- .../general/scripts/azure/deeplearning_configure.py | 11 ++++++++--- .../src/general/scripts/azure/jupyter_configure.py | 11 ++++++++--- .../general/scripts/azure/jupyterlab_configure.py | 11 ++++++++--- .../src/general/scripts/azure/rstudio_configure.py | 11 ++++++++--- .../src/general/scripts/azure/tensor_configure.py | 11 ++++++++--- .../src/general/scripts/azure/zeppelin_configure.py | 11 ++++++++--- 9 files changed, 88 insertions(+), 21 deletions(-) diff --git a/infrastructure-provisioning/src/general/lib/azure/actions_lib.py b/infrastructure-provisioning/src/general/lib/azure/actions_lib.py index 09ec650..063d1ef 100644 --- a/infrastructure-provisioning/src/general/lib/azure/actions_lib.py +++ b/infrastructure-provisioning/src/general/lib/azure/actions_lib.py @@ -839,6 +839,18 @@ class AzureActions: file=sys.stdout)})) traceback.print_exc(file=sys.stdout) + def restart_instance(self, resource_group_name, instance_name): + try: + result = self.compute_client.virtual_machines.restart(resource_group_name, instance_name).wait() + return result + except Exception as err: + logging.info( + "Unable to restart instance: " + str(err) + "\n Traceback: " + traceback.print_exc(file=sys.stdout)) + append_result(str({"error": "Unable to restart instance", + "error_message": str(err) + "\n Traceback: " + traceback.print_exc( + file=sys.stdout)})) + traceback.print_exc(file=sys.stdout) + def set_tag_to_instance(self, resource_group_name, instance_name, tags): try: instance_parameters = self.compute_client.virtual_machines.get(resource_group_name, instance_name) diff --git a/infrastructure-provisioning/src/general/lib/azure/meta_lib.py b/infrastructure-provisioning/src/general/lib/azure/meta_lib.py index 510e875..4003b1f 100644 --- a/infrastructure-provisioning/src/general/lib/azure/meta_lib.py +++ b/infrastructure-provisioning/src/general/lib/azure/meta_lib.py @@ -178,6 +178,26 @@ class AzureMeta: file=sys.stdout)})) traceback.print_exc(file=sys.stdout) + def get_instance_vmagent_status(self, resource_group_name, instance_name): + try: + result = False + display_status = self.compute_client.virtual_machines.instance_view(resource_group_name, instance_name)['vmAgent']['statuses'][0]['displayStatus'] + print('===========1') + print(display_status) + if 'Not Ready' not in display_status: + result = True + return result + except AzureExceptions.CloudError as err: + if err.status_code == 404: + return '' + except Exception as err: + logging.info( + "Unable to view instance: " + str(err) + "\n Traceback: " + traceback.print_exc(file=sys.stdout)) + append_result(str({"error": "Unable to view instance", + "error_message": str(err) + "\n Traceback: " + traceback.print_exc( + file=sys.stdout)})) + traceback.print_exc(file=sys.stdout) + def get_instances_name_by_tag(self, resource_group_name, tag, value): try: list = [] diff --git a/infrastructure-provisioning/src/general/scripts/azure/common_create_notebook_image.py b/infrastructure-provisioning/src/general/scripts/azure/common_create_notebook_image.py index a3cddbd..1e28bed 100644 --- a/infrastructure-provisioning/src/general/scripts/azure/common_create_notebook_image.py +++ b/infrastructure-provisioning/src/general/scripts/azure/common_create_notebook_image.py @@ -90,9 +90,14 @@ if __name__ == "__main__": subprocess.run("~/scripts/{}.py".format('common_prepare_notebook'), shell=True, check=True) instance_running = False while not instance_running: - if AzureMeta.get_instance_status(image_conf['resource_group_name'], - image_conf['instance_name']) == 'running': - instance_running = True + if AzureMeta.get_instance_status( + notebook_config['resource_group_name'], notebook_config['instance_name']) == 'running': + if not AzureMeta.get_instance_vmagent_status( + notebook_config['resource_group_name'], notebook_config['instance_name']): + AzureActions.restart_instance( + notebook_config['resource_group_name'], notebook_config['instance_name']) + else: + instance_running = True instance_hostname = AzureMeta.get_private_ip_address(image_conf['resource_group_name'], image_conf['instance_name']) datalab.actions_lib.remount_azure_disk(True, image_conf['datalab_ssh_user'], instance_hostname, diff --git a/infrastructure-provisioning/src/general/scripts/azure/deeplearning_configure.py b/infrastructure-provisioning/src/general/scripts/azure/deeplearning_configure.py index 1849e4b..29e32de 100644 --- a/infrastructure-provisioning/src/general/scripts/azure/deeplearning_configure.py +++ b/infrastructure-provisioning/src/general/scripts/azure/deeplearning_configure.py @@ -264,9 +264,14 @@ if __name__ == "__main__": subprocess.run("~/scripts/{}.py".format('common_prepare_notebook'), shell=True, check=True) instance_running = False while not instance_running: - if AzureMeta.get_instance_status(notebook_config['resource_group_name'], - notebook_config['instance_name']) == 'running': - instance_running = True + if AzureMeta.get_instance_status( + notebook_config['resource_group_name'], notebook_config['instance_name']) == 'running': + if not AzureMeta.get_instance_vmagent_status( + notebook_config['resource_group_name'], notebook_config['instance_name']): + AzureActions.restart_instance( + notebook_config['resource_group_name'], notebook_config['instance_name']) + else: + instance_running = True instance_hostname = AzureMeta.get_private_ip_address(notebook_config['resource_group_name'], notebook_config['instance_name']) datalab.actions_lib.remount_azure_disk(True, notebook_config['datalab_ssh_user'], instance_hostname, diff --git a/infrastructure-provisioning/src/general/scripts/azure/jupyter_configure.py b/infrastructure-provisioning/src/general/scripts/azure/jupyter_configure.py index 6f42803..76a9e8d 100644 --- a/infrastructure-provisioning/src/general/scripts/azure/jupyter_configure.py +++ b/infrastructure-provisioning/src/general/scripts/azure/jupyter_configure.py @@ -267,9 +267,14 @@ if __name__ == "__main__": subprocess.run("~/scripts/{}.py".format('common_prepare_notebook'), shell=True, check=True) instance_running = False while not instance_running: - if AzureMeta.get_instance_status(notebook_config['resource_group_name'], - notebook_config['instance_name']) == 'running': - instance_running = True + if AzureMeta.get_instance_status( + notebook_config['resource_group_name'], notebook_config['instance_name']) == 'running': + if not AzureMeta.get_instance_vmagent_status( + notebook_config['resource_group_name'], notebook_config['instance_name']): + AzureActions.restart_instance( + notebook_config['resource_group_name'], notebook_config['instance_name']) + else: + instance_running = True instance_hostname = AzureMeta.get_private_ip_address(notebook_config['resource_group_name'], notebook_config['instance_name']) datalab.actions_lib.remount_azure_disk(True, notebook_config['datalab_ssh_user'], instance_hostname, diff --git a/infrastructure-provisioning/src/general/scripts/azure/jupyterlab_configure.py b/infrastructure-provisioning/src/general/scripts/azure/jupyterlab_configure.py index 077f662..6d38ec6 100644 --- a/infrastructure-provisioning/src/general/scripts/azure/jupyterlab_configure.py +++ b/infrastructure-provisioning/src/general/scripts/azure/jupyterlab_configure.py @@ -249,9 +249,14 @@ if __name__ == "__main__": subprocess.run("~/scripts/{}.py".format('common_prepare_notebook'), shell=True, check=True) instance_running = False while not instance_running: - if AzureMeta.get_instance_status(notebook_config['resource_group_name'], - notebook_config['instance_name']) == 'running': - instance_running = True + if AzureMeta.get_instance_status( + notebook_config['resource_group_name'], notebook_config['instance_name']) == 'running': + if not AzureMeta.get_instance_vmagent_status( + notebook_config['resource_group_name'], notebook_config['instance_name']): + AzureActions.restart_instance( + notebook_config['resource_group_name'], notebook_config['instance_name']) + else: + instance_running = True instance_hostname = AzureMeta.get_private_ip_address(notebook_config['resource_group_name'], notebook_config['instance_name']) datalab.actions_lib.remount_azure_disk(True, notebook_config['datalab_ssh_user'], instance_hostname, diff --git a/infrastructure-provisioning/src/general/scripts/azure/rstudio_configure.py b/infrastructure-provisioning/src/general/scripts/azure/rstudio_configure.py index b1e92c5..4dd5ff8 100644 --- a/infrastructure-provisioning/src/general/scripts/azure/rstudio_configure.py +++ b/infrastructure-provisioning/src/general/scripts/azure/rstudio_configure.py @@ -266,9 +266,14 @@ if __name__ == "__main__": subprocess.run("~/scripts/{}.py".format('common_prepare_notebook'), shell=True, check=True) instance_running = False while not instance_running: - if AzureMeta.get_instance_status(notebook_config['resource_group_name'], - notebook_config['instance_name']) == 'running': - instance_running = True + if AzureMeta.get_instance_status( + notebook_config['resource_group_name'], notebook_config['instance_name']) == 'running': + if not AzureMeta.get_instance_vmagent_status( + notebook_config['resource_group_name'], notebook_config['instance_name']): + AzureActions.restart_instance( + notebook_config['resource_group_name'], notebook_config['instance_name']) + else: + instance_running = True instance_hostname = AzureMeta.get_private_ip_address(notebook_config['resource_group_name'], notebook_config['instance_name']) datalab.actions_lib.remount_azure_disk(True, notebook_config['datalab_ssh_user'], instance_hostname, diff --git a/infrastructure-provisioning/src/general/scripts/azure/tensor_configure.py b/infrastructure-provisioning/src/general/scripts/azure/tensor_configure.py index 4ba5015..ebccf04 100644 --- a/infrastructure-provisioning/src/general/scripts/azure/tensor_configure.py +++ b/infrastructure-provisioning/src/general/scripts/azure/tensor_configure.py @@ -262,9 +262,14 @@ if __name__ == "__main__": subprocess.run("~/scripts/{}.py".format('common_prepare_notebook'), shell=True, check=True) instance_running = False while not instance_running: - if AzureMeta.get_instance_status(notebook_config['resource_group_name'], - notebook_config['instance_name']) == 'running': - instance_running = True + if AzureMeta.get_instance_status( + notebook_config['resource_group_name'], notebook_config['instance_name']) == 'running': + if not AzureMeta.get_instance_vmagent_status( + notebook_config['resource_group_name'], notebook_config['instance_name']): + AzureActions.restart_instance( + notebook_config['resource_group_name'], notebook_config['instance_name']) + else: + instance_running = True instance_hostname = AzureMeta.get_private_ip_address(notebook_config['resource_group_name'], notebook_config['instance_name']) datalab.actions_lib.remount_azure_disk(True, notebook_config['datalab_ssh_user'], instance_hostname, diff --git a/infrastructure-provisioning/src/general/scripts/azure/zeppelin_configure.py b/infrastructure-provisioning/src/general/scripts/azure/zeppelin_configure.py index 21d3902..4c11f41 100644 --- a/infrastructure-provisioning/src/general/scripts/azure/zeppelin_configure.py +++ b/infrastructure-provisioning/src/general/scripts/azure/zeppelin_configure.py @@ -277,9 +277,14 @@ if __name__ == "__main__": subprocess.run("~/scripts/{}.py".format('common_prepare_notebook'), shell=True, check=True) instance_running = False while not instance_running: - if AzureMeta.get_instance_status(notebook_config['resource_group_name'], - notebook_config['instance_name']) == 'running': - instance_running = True + if AzureMeta.get_instance_status( + notebook_config['resource_group_name'], notebook_config['instance_name']) == 'running': + if not AzureMeta.get_instance_vmagent_status( + notebook_config['resource_group_name'], notebook_config['instance_name']): + AzureActions.restart_instance( + notebook_config['resource_group_name'], notebook_config['instance_name']) + else: + instance_running = True instance_hostname = AzureMeta.get_private_ip_address(notebook_config['resource_group_name'], notebook_config['instance_name']) datalab.actions_lib.remount_azure_disk(True, notebook_config['datalab_ssh_user'], instance_hostname, --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
