This is an automated email from the ASF dual-hosted git repository. mykolabodnar pushed a commit to branch DLAB-515 in repository https://gitbox.apache.org/repos/asf/incubator-dlab.git
The following commit(s) were added to refs/heads/DLAB-515 by this push: new 3521c5f [DLAB-515] - [AWS] EMR kernels connection via sparkmagic/livy implemented 3521c5f is described below commit 3521c5f9ae61eaeb98413e04a785efb0f920a354 Author: bodnarmykola <bodnarmyk...@gmail.com> AuthorDate: Mon Jul 13 11:40:17 2020 +0300 [DLAB-515] - [AWS] EMR kernels connection via sparkmagic/livy implemented --- .../src/general/files/aws/deeplearning_Dockerfile | 1 + .../src/general/files/aws/jupyter_Dockerfile | 1 + .../src/general/lib/os/debian/notebook_lib.py | 5 +++-- .../src/general/lib/os/fab.py | 4 ++-- .../aws/jupyter_dataengine-service_create_configs.py | 17 +++++++++++++++++ .../jupyter_install_dataengine-service_kernels.py | 7 ++++++- .../templates/os/sparkmagic_config_template.json | 20 ++++++++++++++++++++ 7 files changed, 50 insertions(+), 5 deletions(-) diff --git a/infrastructure-provisioning/src/general/files/aws/deeplearning_Dockerfile b/infrastructure-provisioning/src/general/files/aws/deeplearning_Dockerfile index fb6551f..587a1b6 100644 --- a/infrastructure-provisioning/src/general/files/aws/deeplearning_Dockerfile +++ b/infrastructure-provisioning/src/general/files/aws/deeplearning_Dockerfile @@ -42,6 +42,7 @@ COPY general/templates/os/inactive.service /root/templates/ COPY general/templates/os/inactive.timer /root/templates/ COPY general/files/os/toree-assembly-0.3.0.jar /root/files/ COPY general/files/os/toree_kernel.tar.gz /root/files/ +COPY general/templates/os/sparkmagic_config_template.json /root/templates/ COPY general/templates/os/pyspark_dataengine-service_template.json /root/templates/ COPY general/templates/os/r_dataengine-service_template.json /root/templates/ COPY general/templates/os/toree_dataengine-service_* /root/templates/ diff --git a/infrastructure-provisioning/src/general/files/aws/jupyter_Dockerfile b/infrastructure-provisioning/src/general/files/aws/jupyter_Dockerfile index 4c83fac..a2d6198 100644 --- a/infrastructure-provisioning/src/general/files/aws/jupyter_Dockerfile +++ b/infrastructure-provisioning/src/general/files/aws/jupyter_Dockerfile @@ -35,6 +35,7 @@ COPY general/templates/os/pyspark_local_template.json /root/templates/ COPY general/templates/os/py3spark_local_template.json /root/templates/ COPY general/templates/os/pyspark_dataengine-service_template.json /root/templates/ COPY general/templates/os/r_dataengine-service_template.json /root/templates/ +COPY general/templates/os/sparkmagic_config_template.json /root/templates/ COPY general/templates/os/r_template.json /root/templates/ COPY general/templates/os/run_template.sh /root/templates/ COPY general/templates/os/toree_dataengine-service_* /root/templates/ diff --git a/infrastructure-provisioning/src/general/lib/os/debian/notebook_lib.py b/infrastructure-provisioning/src/general/lib/os/debian/notebook_lib.py index 4e1efe1..7310d0d 100644 --- a/infrastructure-provisioning/src/general/lib/os/debian/notebook_lib.py +++ b/infrastructure-provisioning/src/general/lib/os/debian/notebook_lib.py @@ -268,12 +268,13 @@ def ensure_python3_libraries(os_user): try: #manage_pkg('-y install', 'remote', 'python3-setuptools') manage_pkg('-y install', 'remote', 'python3-pip') + manage_pkg('-y install', 'remote', 'libkrb5-dev') sudo('pip3 install setuptools=={}'.format(os.environ['notebook_setuptools_version'])) try: - sudo('pip3 install tornado=={0} ipython==7.9.0 ipykernel=={1} --no-cache-dir' \ + sudo('pip3 install tornado=={0} ipython==7.9.0 ipykernel=={1} sparkmagic --no-cache-dir' \ .format(os.environ['notebook_tornado_version'], os.environ['notebook_ipykernel_version'])) except: - sudo('pip3 install tornado=={0} ipython==5.0.0 ipykernel=={1} --no-cache-dir' \ + sudo('pip3 install tornado=={0} ipython==5.0.0 ipykernel=={1} sparkmagic --no-cache-dir' \ .format(os.environ['notebook_tornado_version'], os.environ['notebook_ipykernel_version'])) sudo('pip3 install -U pip=={} --no-cache-dir'.format(os.environ['conf_pip_version'])) sudo('pip3 install boto3 --no-cache-dir') diff --git a/infrastructure-provisioning/src/general/lib/os/fab.py b/infrastructure-provisioning/src/general/lib/os/fab.py index d664a5b..2acb5f8 100644 --- a/infrastructure-provisioning/src/general/lib/os/fab.py +++ b/infrastructure-provisioning/src/general/lib/os/fab.py @@ -180,9 +180,9 @@ def configure_jupyter(os_user, jupyter_conf_file, templates_dir, jupyter_version sudo("sed -i 's|OS_USR|{}|' /tmp/jupyter-notebook.service".format(os_user)) http_proxy = run('echo $http_proxy') https_proxy = run('echo $https_proxy') - sudo('sed -i \'/\[Service\]/ a\Environment=\"HTTP_PROXY={}\"\' /tmp/jupyter-notebook.service'.format( + #sudo('sed -i \'/\[Service\]/ a\Environment=\"HTTP_PROXY={}\"\' /tmp/jupyter-notebook.service'.format( http_proxy)) - sudo('sed -i \'/\[Service\]/ a\Environment=\"HTTPS_PROXY={}\"\' /tmp/jupyter-notebook.service'.format( + #sudo('sed -i \'/\[Service\]/ a\Environment=\"HTTPS_PROXY={}\"\' /tmp/jupyter-notebook.service'.format( https_proxy)) java_home = run("update-alternatives --query java | grep -o \'/.*/java-8.*/jre\'").splitlines()[0] sudo('sed -i \'/\[Service\]/ a\Environment=\"JAVA_HOME={}\"\' /tmp/jupyter-notebook.service'.format( diff --git a/infrastructure-provisioning/src/general/scripts/aws/jupyter_dataengine-service_create_configs.py b/infrastructure-provisioning/src/general/scripts/aws/jupyter_dataengine-service_create_configs.py index ed3daee..46cda9c 100644 --- a/infrastructure-provisioning/src/general/scripts/aws/jupyter_dataengine-service_create_configs.py +++ b/infrastructure-provisioning/src/general/scripts/aws/jupyter_dataengine-service_create_configs.py @@ -52,6 +52,7 @@ parser.add_argument('--os_user', type=str, default='') parser.add_argument('--pip_mirror', type=str, default='') parser.add_argument('--numpy_version', type=str, default='') parser.add_argument('--application', type=str, default='') +parser.add_argument('--master_ip', type=str, default='') args = parser.parse_args() emr_dir = '/opt/' + args.emr_version + '/jars/' @@ -162,11 +163,27 @@ def add_breeze_library_emr(args): local(""" sudo bash -c "sed -i '/spark.driver.extraClassPath/s/$/:\/opt\/""" + args.emr_version + """\/jars\/usr\/other\/*/' """ + spark_defaults_path + """" """) +def install_sparkamagic_kernels(args): + try: + local('sudo jupyter nbextension enable --py --sys-prefix widgetsnbextension') + sparkmagic_dir = local("sudo pip3 show sparkmagic | grep 'Location: ' | awk '{print $2}'", capture=True) + local('sudo jupyter-kernelspec install {}/sparkmagic/kernels/sparkkernel --user'.format(sparkmagic_dir)) + local('sudo jupyter-kernelspec install {}/sparkmagic/kernels/pysparkkernel --user'.format(sparkmagic_dir)) + local('sudo jupyter-kernelspec install {}/sparkmagic/kernels/sparkrkernel --user'.format(sparkmagic_dir)) + local('mkdir -p /home/' + args.os_user + '/.sparkmagic') + local('cp -f /tmp/sparkmagic_config_template.json /home/' + args.os_user + '/.sparkmagic/config.json') + local('sed -i \'s|LIVY_HOST|{0}|g\' /home/{1}/.sparkmagic/config.json'.format( + args.master_ip, args.os_user)) + except: + sys.exit(1) + + if __name__ == "__main__": if args.dry_run == 'true': parser.print_help() else: + install_sparkamagic_kernels(args) result = prepare(emr_dir, yarn_dir) if result == False : jars(args, emr_dir) diff --git a/infrastructure-provisioning/src/general/scripts/aws/jupyter_install_dataengine-service_kernels.py b/infrastructure-provisioning/src/general/scripts/aws/jupyter_install_dataengine-service_kernels.py index fb29f0a..0b82ce1 100644 --- a/infrastructure-provisioning/src/general/scripts/aws/jupyter_install_dataengine-service_kernels.py +++ b/infrastructure-provisioning/src/general/scripts/aws/jupyter_install_dataengine-service_kernels.py @@ -50,6 +50,7 @@ def configure_notebook(args): templates_dir = '/root/templates/' files_dir = '/root/files/' scripts_dir = '/root/scripts/' + put(templates_dir + 'sparkmagic_config_template.json', '/tmp/sparkmagic_config_template.json') put(templates_dir + 'pyspark_dataengine-service_template.json', '/tmp/pyspark_dataengine-service_template.json') put(templates_dir + 'r_dataengine-service_template.json', '/tmp/r_dataengine-service_template.json') put(templates_dir + 'toree_dataengine-service_template.json','/tmp/toree_dataengine-service_template.json') @@ -92,9 +93,13 @@ if __name__ == "__main__": print(r_version) else: r_version = 'false' + cluster_id = get_emr_id_by_name(args.cluster_name) + master_instances = get_emr_instances_list(cluster_id, 'MASTER') + master_ip = master_instances[0].get('PrivateIpAddress') sudo("/usr/bin/python /usr/local/bin/jupyter_dataengine-service_create_configs.py --bucket " + args.bucket + " --cluster_name " + args.cluster_name + " --emr_version " + args.emr_version + " --spark_version " + spark_version + " --scala_version " + scala_version + " --r_version " + r_version + " --hadoop_version " + hadoop_version + " --region " + args.region + " --excluded_lines '" + args.emr_excluded_spark_properties + "' --project_name " + args.project_name + " --os_user " + args.os_user + " --pip_mirror " - + args.pip_mirror + " --numpy_version " + numpy_version + " --application " + args.application) + + args.pip_mirror + " --numpy_version " + numpy_version + " --application " + + args.application + " --master_ip " + master_ip) diff --git a/infrastructure-provisioning/src/general/templates/os/sparkmagic_config_template.json b/infrastructure-provisioning/src/general/templates/os/sparkmagic_config_template.json new file mode 100644 index 0000000..e6fa8ef --- /dev/null +++ b/infrastructure-provisioning/src/general/templates/os/sparkmagic_config_template.json @@ -0,0 +1,20 @@ +{ + "kernel_python_credentials" : { + "username": "", + "password": "", + "url": "http://LIVY_HOST:8998", + "auth": "None" + }, + "kernel_scala_credentials" : { + "username": "", + "password": "", + "url": "http://LIVY_HOST:8998", + "auth": "None" + }, + "kernel_r_credentials": { + "username": "", + "password": "", + "url": "http://LIVY_HOST:8998", + "auth": "None" + } +} \ No newline at end of file --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@dlab.apache.org For additional commands, e-mail: commits-h...@dlab.apache.org