This is an automated email from the ASF dual-hosted git repository.

lfrolov pushed a commit to branch DATALAB-2691
in repository https://gitbox.apache.org/repos/asf/incubator-datalab.git


The following commit(s) were added to refs/heads/DATALAB-2691 by this push:
     new 7c3a7d4  [DATALAB-2691]: fixed python libs install for gcp dataproc on 
zeppelin
7c3a7d4 is described below

commit 7c3a7d4133b335bd2b7a6ca5963823345eb0d4d6
Author: leonidfrolov <[email protected]>
AuthorDate: Wed Mar 16 13:57:39 2022 +0200

    [DATALAB-2691]: fixed python libs install for gcp dataproc on zeppelin
---
 .../src/general/lib/gcp/actions_lib.py                       | 12 +++++-------
 .../gcp/zeppelin_dataengine-service_create_configs.py        | 11 ++++++++---
 .../gcp/zeppelin_install_dataengine-service_kernels.py       | 12 +++++++-----
 3 files changed, 20 insertions(+), 15 deletions(-)

diff --git a/infrastructure-provisioning/src/general/lib/gcp/actions_lib.py 
b/infrastructure-provisioning/src/general/lib/gcp/actions_lib.py
index 6bfc177..b0733b8 100644
--- a/infrastructure-provisioning/src/general/lib/gcp/actions_lib.py
+++ b/infrastructure-provisioning/src/general/lib/gcp/actions_lib.py
@@ -1340,7 +1340,7 @@ class GCPActions:
         except:
             sys.exit(1)
 
-    def install_python(self, bucket, user_name, cluster_name, application, 
numpy_version='1.14.3'):
+    def install_python(self, bucket, user_name, cluster_name, application, 
numpy_version):
         try:
             GCPActions().get_cluster_app_version(bucket, user_name, 
cluster_name, 'python')
             with open('/tmp/python_version') as f:
@@ -1355,11 +1355,9 @@ class GCPActions:
                 subprocess.run('sudo -i virtualenv 
/opt/python/python{}'.format(python_version), shell=True, check=True)
                 venv_command = 'source 
/opt/python/python{}/bin/activate'.format(python_version)
                 pip_command = 
'/opt/python/python{0}/bin/pip{1}'.format(python_version, python_version[:3])
-                subprocess.run('bash -c "{0} && sudo -i {1} install -U 
pip==9.0.3"'.format(venv_command, pip_command), shell=True, check=True)
-                subprocess.run('bash -c "{0} && sudo -i {1} install 
pyzmq==17.0.0"'.format(venv_command, pip_command), shell=True, check=True)
-                for lib in ['ipython', 'ipykernel', 
'NumPy=={}'.format(numpy_version), 'boto', 'boto3', 'pybind11',
-                            'pythran', 'cython', 'SciPy', 'Matplotlib', 
'pandas', 'Sympy', 'Pillow', 'sklearn']:
-                    subprocess.run('bash -c "{0} && sudo -i {1} install {2} 
--no-cache-dir"'
+                for lib in ['-U pip==9.0.3', 'pyzmq==17.0.0', 'ipython 
ipykernel boto boto3 pybind11 pythran cython NumPy=={} Matplotlib 
--no-cache-dir'.format(numpy_version),
+                            'SciPy pandas Sympy Pillow --no-cache-dir', 
'sklearn --no-cache-dir']:
+                    subprocess.run('bash -c "{0} && sudo -i {1} install {2}"'
                                    .format(venv_command, pip_command, lib), 
shell=True, check=True)
                 if application == 'deeplearning':
                     subprocess.run('bash -c "{0} && sudo -i {1} install 
mxnet-cu80 opencv-python keras Theano --no-cache-dir"'.format(venv_command, 
pip_command), shell=True, check=True)
@@ -1410,7 +1408,7 @@ def get_cluster_python_version(region, bucket, user_name, 
cluster_name):
 
 def installing_python(region, bucket, user_name, cluster_name, application='', 
pip_mirror='', numpy_version='1.14.3'):
     try:
-        GCPActions().install_python(bucket, user_name, cluster_name, 
application)
+        GCPActions().install_python(bucket, user_name, cluster_name, 
application, numpy_version)
     except:
         sys.exit(1)
 
diff --git 
a/infrastructure-provisioning/src/general/scripts/gcp/zeppelin_dataengine-service_create_configs.py
 
b/infrastructure-provisioning/src/general/scripts/gcp/zeppelin_dataengine-service_create_configs.py
index ea33688..380015c 100644
--- 
a/infrastructure-provisioning/src/general/scripts/gcp/zeppelin_dataengine-service_create_configs.py
+++ 
b/infrastructure-provisioning/src/general/scripts/gcp/zeppelin_dataengine-service_create_configs.py
@@ -36,6 +36,7 @@ parser.add_argument('--dry_run', type=str, default='false')
 parser.add_argument('--dataproc_version', type=str, default='')
 parser.add_argument('--spark_version', type=str, default='')
 parser.add_argument('--hadoop_version', type=str, default='')
+parser.add_argument('--numpy_version', type=str, default='')
 parser.add_argument('--region', type=str, default='')
 parser.add_argument('--user_name', type=str, default='')
 parser.add_argument('--os_user', type=str, default='')
@@ -80,7 +81,11 @@ if __name__ == "__main__":
         configuring_notebook(args.dataproc_version)
         if args.multiple_clusters == 'true':
             install_remote_livy(args)
-        installing_python(args.region, args.bucket, args.user_name, 
args.cluster_name, args.application, args.pip_mirror)
-        
datalab.actions_lib.GCPActions().configure_zeppelin_dataproc_interpreter(args.dataproc_version,
 args.cluster_name, spark_dir, args.os_user,
-                                                                         
yarn_dir, args.bucket, args.user_name, args.multiple_clusters)
+        installing_python(args.region, args.bucket, args.user_name, 
args.cluster_name, args.application,
+                          args.pip_mirror, args.numpy_version)
+        
datalab.actions_lib.GCPActions().configure_zeppelin_dataproc_interpreter(args.dataproc_version,
+                                                                               
  args.cluster_name, spark_dir,
+                                                                               
  args.os_user, yarn_dir,
+                                                                               
  args.bucket, args.user_name,
+                                                                               
  args.multiple_clusters)
         update_zeppelin_interpreters(args.multiple_clusters, args.r_enabled)
diff --git 
a/infrastructure-provisioning/src/general/scripts/gcp/zeppelin_install_dataengine-service_kernels.py
 
b/infrastructure-provisioning/src/general/scripts/gcp/zeppelin_install_dataengine-service_kernels.py
index 22936d4..7893d64 100644
--- 
a/infrastructure-provisioning/src/general/scripts/gcp/zeppelin_install_dataengine-service_kernels.py
+++ 
b/infrastructure-provisioning/src/general/scripts/gcp/zeppelin_install_dataengine-service_kernels.py
@@ -57,7 +57,8 @@ def configure_notebook(args):
     conn.sudo('chmod 755 /usr/local/bin/create_configs.py')
     conn.sudo('mkdir -p /usr/lib/python3.8/datalab/')
     conn.run('mkdir -p /home/{}/datalab_libs/'.format(args.os_user))
-    conn.local('rsync -e "ssh -i {0}" /usr/lib/python3.8/datalab/*.py 
{1}@{2}:/home/{1}/datalab_libs/'.format(args.keyfile, args.os_user, 
args.notebook_ip))
+    conn.local('rsync -e "ssh -i {0}" /usr/lib/python3.8/datalab/*.py 
{1}@{2}:/home/{1}/datalab_libs/'
+               .format(args.keyfile, args.os_user, args.notebook_ip))
     conn.run('chmod a+x /home/{}/datalab_libs/*'.format(args.os_user))
     conn.sudo('mv /home/{}/datalab_libs/* 
/usr/lib/python3.8/datalab/'.format(args.os_user))
     conn.sudo('rm -rf /home/{}/datalab_libs/'.format(args.os_user))
@@ -75,7 +76,8 @@ if __name__ == "__main__":
     hadoop_version = 
datalab.actions_lib.GCPActions().get_cluster_app_version(args.bucket, 
args.project_name, args.cluster_name, 'hadoop')
     conn.sudo('''bash -l -c 'echo "[global]" > /etc/pip.conf; echo "proxy = 
$(cat /etc/profile | grep proxy | head -n1 | cut -f2 -d=)" >> /etc/pip.conf' 
''')
     conn.sudo('''bash -l -c 'echo "use_proxy=yes" > ~/.wgetrc; proxy=$(cat 
/etc/profile | grep proxy | head -n1 | cut -f2 -d=); echo "http_proxy=$proxy" 
>> ~/.wgetrc; echo "https_proxy=$proxy" >> ~/.wgetrc' ''')
-    conn.sudo('''bash -l -c 'unset http_proxy https_proxy; export 
gcp_project_id="{0}"; export conf_resource="{1}"; /usr/bin/python3 
/usr/local/bin/create_configs.py --bucket {2} --cluster_name {3} 
--dataproc_version {4} --spark_version {5} --hadoop_version {6} --region {7} 
--user_name {8} --os_user {9} --application {10} --livy_version {11} 
--multiple_clusters {12} --r_enabled {13}' '''
-        .format(os.environ['gcp_project_id'], os.environ['conf_resource'], 
args.bucket, args.cluster_name, args.dataproc_version,
-                spark_version, hadoop_version, args.region, args.project_name, 
args.os_user, args.application,
-                os.environ['notebook_livy_version'], 
os.environ['notebook_multiple_clusters'], r_enabled))
\ No newline at end of file
+    conn.sudo('''bash -l -c 'unset http_proxy https_proxy; export 
gcp_project_id="{0}"; export conf_resource="{1}"; /usr/bin/python3 
/usr/local/bin/create_configs.py --bucket {2} --cluster_name {3} 
--dataproc_version {4} --spark_version {5} --hadoop_version {6} --region {7} 
--user_name {8} --os_user {9} --application {10} --livy_version {11} 
--multiple_clusters {12} --r_enabled {13} --numpy_version {14}' '''
+        .format(os.environ['gcp_project_id'], os.environ['conf_resource'], 
args.bucket, args.cluster_name,
+                args.dataproc_version, spark_version, hadoop_version, 
args.region, args.project_name, args.os_user,
+                args.application, os.environ['notebook_livy_version'], 
os.environ['notebook_multiple_clusters'],
+                r_enabled, os.environ['notebook_numpy_version']))
\ No newline at end of file

---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to