This is an automated email from the ASF dual-hosted git repository.

mykolabodnar pushed a commit to branch DATALAB-2398
in repository https://gitbox.apache.org/repos/asf/incubator-datalab.git


The following commit(s) were added to refs/heads/DATALAB-2398 by this push:
     new c7669f2  [DATALAB-2398] - [Zeppelin] python specific version via vevn 
usage implemented
c7669f2 is described below

commit c7669f2b1a41bf7bffb080872d6a7f63875e2ad1
Author: bodnarmykola <[email protected]>
AuthorDate: Mon Jun 7 16:02:24 2021 +0300

    [DATALAB-2398] - [Zeppelin] python specific version via vevn usage 
implemented
---
 .../general/templates/gcp/interpreter_spark.json   | 114 ++-------------------
 .../zeppelin/scripts/configure_zeppelin_node.py    |  15 ++-
 2 files changed, 18 insertions(+), 111 deletions(-)

diff --git 
a/infrastructure-provisioning/src/general/templates/gcp/interpreter_spark.json 
b/infrastructure-provisioning/src/general/templates/gcp/interpreter_spark.json
index 17e979d..1e503dd 100644
--- 
a/infrastructure-provisioning/src/general/templates/gcp/interpreter_spark.json
+++ 
b/infrastructure-provisioning/src/general/templates/gcp/interpreter_spark.json
@@ -1,112 +1,5 @@
 {
   "interpreterSettings": {
-    "2C6RJRBD1": {
-      "id": "2C6RJRBD1",
-      "name": "local_interpreter_python2",
-      "group": "spark",
-      "properties": {
-        "zeppelin.spark.printREPLOutput": {
-            "propertyName": "zeppelin.spark.printREPLOutput",
-            "value": "true",
-            "description": "Print REPL output",
-            "type": "checkbox"
-          },
-        "zeppelin.dep.additionalRemoteRepository": {
-            "envName": "ZEPPELIN_DEP_ADDITIONALREMOTEREPOSITORY",
-            "propertyName": "zeppelin.dep.additionalRemoteRepository",
-            "value": 
"spark-packages,http://dl.bintray.com/spark-packages/maven,false;";,
-            "description": "",
-            "type": "string"
-          },
-        "zeppelin.spark.sql.stacktrace": {
-            "envName": "ZEPPELIN_SPARK_SQL_STACKTRACE",
-            "propertyName": "zeppelin.spark.sql.stacktrace",
-            "value": "false",
-            "description": "",
-            "type": "checkbox"
-          },
-        "zeppelin.spark.importImplicit":{
-            "envName": "ZEPPELIN_SPARK_IMPORTIMPLICIT",
-            "propertyName": "zeppelin.spark.importImplicit",
-            "value": "true",
-            "description": "",
-            "type": "checkbox"
-          },
-        "zeppelin.spark.concurrentSQL": {
-            "envName": "ZEPPELIN_SPARK_CONCURRENTSQL",
-            "propertyName": "zeppelin.spark.concurrentSQL",
-            "value": "false",
-            "description": "",
-            "type": "checkbox"
-          },
-        "zeppelin.spark.useHiveContext": {
-            "envName": "ZEPPELIN_SPARK_USEHIVECONTEXT",
-            "propertyName": "zeppelin.spark.useHiveContext",
-            "value": "true",
-            "description": "Use HiveContext instead of SQLContext if it is 
true.",
-            "type": "checkbox"
-          },
-        "zeppelin.pyspark.python": {
-            "envName": "ZEPPELIN_PYSPARK_PYTHON",
-            "propertyName": "zeppelin.pyspark.python",
-            "value": "python",
-            "description": "",
-            "type": "string"
-          },
-        "zeppelin.dep.localrepo": {
-            "envName": "ZEPPELIN_DEP_LOCALREPO",
-            "propertyName": "zeppelin.dep.localrepo",
-            "value": "local-repo",
-            "description": "",
-            "type": "string"
-          },
-        "zeppelin.spark.maxResult": {
-            "envName": "ZEPPELIN_SPARK_MAXRESULT",
-            "propertyName": "zeppelin.spark.maxResult",
-            "value": "1000",
-            "description": "Max number of Spark SQL result to display.",
-            "type": "number"
-          },
-        "master":{
-            "envName": "Master",
-            "propertyName": "spark.master",
-            "value": "local[*]",
-            "description": "Spark master uri. ex) spark://masterhost:7077",
-            "type": "string"
-          },
-        "spark.app.name": {
-            "envName": "SPARK_APP_NAME",
-            "propertyName": "spark.app.name",
-            "value": "Zeppelin",
-            "description": "The name of spark application.",
-            "type": "string"
-          },
-        "spark.driver.memory": {
-              "envName": "MEMORY_DRIVER",
-              "propertyName": "spark.driver.memory",
-              "value": "DRIVER_MEMORY",
-              "description": "",
-              "type": "string"
-          }
-      },
-      "interpreterGroup": [
-        {
-          "class": "org.apache.zeppelin.spark.SparkInterpreter",
-          "name": "spark"
-        },
-        {
-          "class": "org.apache.zeppelin.spark.PySparkInterpreter",
-          "name": "pyspark"
-        }
-      ],
-      "dependencies": [],
-      "option": {
-        "remote": true,
-        "perNoteSession": false,
-        "perNoteProcess": false,
-        "isExistingProcess": false
-      }
-    },
     "2C6RJRBD2": {
       "id": "2C6RJRBD2",
       "name": "local_interpreter_python3",
@@ -201,6 +94,13 @@
               "value": "DRIVER_MEMORY",
               "description": "",
               "type": "string"
+          },
+        "spark.pyspark.python": {
+              "envName": "PYSPARK_PYTHON",
+              "propertyName": "spark.pyspark.python",
+              "value": "PYTHON_VENV_PATH",
+              "description": "",
+              "type": "string"
           }
       },
       "interpreterGroup": [
diff --git 
a/infrastructure-provisioning/src/zeppelin/scripts/configure_zeppelin_node.py 
b/infrastructure-provisioning/src/zeppelin/scripts/configure_zeppelin_node.py
index 33a3c8b..fc4202e 100644
--- 
a/infrastructure-provisioning/src/zeppelin/scripts/configure_zeppelin_node.py
+++ 
b/infrastructure-provisioning/src/zeppelin/scripts/configure_zeppelin_node.py
@@ -58,6 +58,8 @@ spark_version = args.spark_version
 hadoop_version = args.hadoop_version
 scala_link = "https://www.scala-lang.org/files/archive/";
 zeppelin_version = args.zeppelin_version
+python_venv_version = os.environ['notebook_python_venv_version']
+python_venv_path = 
'/opt/python/python{0}/bin/python{1}'.format(python_venv_version, 
python_venv_version[:3])
 zeppelin_link = "https://archive.apache.org/dist/zeppelin/zeppelin-"; + 
zeppelin_version + "/zeppelin-" + \
                 zeppelin_version + "-bin-netinst.tgz"
 if args.region == 'cn-north-1':
@@ -161,13 +163,14 @@ def configure_local_livy_kernels(args):
     conn.sudo("systemctl start zeppelin-notebook")
 
 
-def configure_local_spark_kernels(args):
+def configure_local_spark_kernels(args, python_venv_path):
     if not exists(conn,'/home/' + args.os_user + 
'/.ensure_dir/local_spark_kernel_ensured'):
         conn.put(templates_dir + 'interpreter_spark.json', 
'/tmp/interpreter.json')
         conn.sudo('sed -i "s|ENDPOINTURL|' + args.endpoint_url + '|g" 
/tmp/interpreter.json')
         conn.sudo('sed -i "s|OS_USER|' + args.os_user + '|g" 
/tmp/interpreter.json')
         spark_memory = get_spark_memory()
         conn.sudo('sed -i "s|DRIVER_MEMORY|{}m|g" 
/tmp/interpreter.json'.format(spark_memory))
+        conn.sudo('sed -i "s|PYTHON_VENV_PATH|{}m|g" 
/tmp/interpreter.json'.format(python_venv_path))
         update_zeppelin_interpreters(args.multiple_clusters, r_enabled, 
'local')
         conn.sudo('cp -f /tmp/interpreter.json 
/opt/zeppelin/conf/interpreter.json')
         conn.sudo('chown ' + args.os_user + ':' + args.os_user + ' -R 
/opt/zeppelin/')
@@ -226,8 +229,12 @@ if __name__ == "__main__":
         ensure_r(args.os_user, r_libs, args.region, args.r_mirror)
     print("Install Python 3 modules")
     ensure_python3_libraries(args.os_user)
-    print("Install Python 3 specific version")
-    ensure_python3_specific_version(python3_version, args.os_user)
+
+    # INSTALL PYTHON IN VIRTUALENV
+    print("Configure Python Virtualenv")
+    ensure_python_venv(python_venv_version)
+    #print("Install Python 3 specific version")
+    #ensure_python3_specific_version(python3_version, args.os_user)
 
     # INSTALL SPARK AND CLOUD STORAGE JARS FOR SPARK
     print("Install local Spark")
@@ -249,7 +256,7 @@ if __name__ == "__main__":
         configure_local_livy_kernels(args)
     else:
         print("Configuring local kernels")
-        configure_local_spark_kernels(args)
+        configure_local_spark_kernels(args, python_venv_path)
 
     # INSTALL UNGIT
     print("Install nodejs")

---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to