This is an automated email from the ASF dual-hosted git repository.
mykolabodnar pushed a commit to branch DATALAB-2398
in repository https://gitbox.apache.org/repos/asf/incubator-datalab.git
The following commit(s) were added to refs/heads/DATALAB-2398 by this push:
new c7669f2 [DATALAB-2398] - [Zeppelin] python specific version via vevn
usage implemented
c7669f2 is described below
commit c7669f2b1a41bf7bffb080872d6a7f63875e2ad1
Author: bodnarmykola <[email protected]>
AuthorDate: Mon Jun 7 16:02:24 2021 +0300
[DATALAB-2398] - [Zeppelin] python specific version via vevn usage
implemented
---
.../general/templates/gcp/interpreter_spark.json | 114 ++-------------------
.../zeppelin/scripts/configure_zeppelin_node.py | 15 ++-
2 files changed, 18 insertions(+), 111 deletions(-)
diff --git
a/infrastructure-provisioning/src/general/templates/gcp/interpreter_spark.json
b/infrastructure-provisioning/src/general/templates/gcp/interpreter_spark.json
index 17e979d..1e503dd 100644
---
a/infrastructure-provisioning/src/general/templates/gcp/interpreter_spark.json
+++
b/infrastructure-provisioning/src/general/templates/gcp/interpreter_spark.json
@@ -1,112 +1,5 @@
{
"interpreterSettings": {
- "2C6RJRBD1": {
- "id": "2C6RJRBD1",
- "name": "local_interpreter_python2",
- "group": "spark",
- "properties": {
- "zeppelin.spark.printREPLOutput": {
- "propertyName": "zeppelin.spark.printREPLOutput",
- "value": "true",
- "description": "Print REPL output",
- "type": "checkbox"
- },
- "zeppelin.dep.additionalRemoteRepository": {
- "envName": "ZEPPELIN_DEP_ADDITIONALREMOTEREPOSITORY",
- "propertyName": "zeppelin.dep.additionalRemoteRepository",
- "value":
"spark-packages,http://dl.bintray.com/spark-packages/maven,false;",
- "description": "",
- "type": "string"
- },
- "zeppelin.spark.sql.stacktrace": {
- "envName": "ZEPPELIN_SPARK_SQL_STACKTRACE",
- "propertyName": "zeppelin.spark.sql.stacktrace",
- "value": "false",
- "description": "",
- "type": "checkbox"
- },
- "zeppelin.spark.importImplicit":{
- "envName": "ZEPPELIN_SPARK_IMPORTIMPLICIT",
- "propertyName": "zeppelin.spark.importImplicit",
- "value": "true",
- "description": "",
- "type": "checkbox"
- },
- "zeppelin.spark.concurrentSQL": {
- "envName": "ZEPPELIN_SPARK_CONCURRENTSQL",
- "propertyName": "zeppelin.spark.concurrentSQL",
- "value": "false",
- "description": "",
- "type": "checkbox"
- },
- "zeppelin.spark.useHiveContext": {
- "envName": "ZEPPELIN_SPARK_USEHIVECONTEXT",
- "propertyName": "zeppelin.spark.useHiveContext",
- "value": "true",
- "description": "Use HiveContext instead of SQLContext if it is
true.",
- "type": "checkbox"
- },
- "zeppelin.pyspark.python": {
- "envName": "ZEPPELIN_PYSPARK_PYTHON",
- "propertyName": "zeppelin.pyspark.python",
- "value": "python",
- "description": "",
- "type": "string"
- },
- "zeppelin.dep.localrepo": {
- "envName": "ZEPPELIN_DEP_LOCALREPO",
- "propertyName": "zeppelin.dep.localrepo",
- "value": "local-repo",
- "description": "",
- "type": "string"
- },
- "zeppelin.spark.maxResult": {
- "envName": "ZEPPELIN_SPARK_MAXRESULT",
- "propertyName": "zeppelin.spark.maxResult",
- "value": "1000",
- "description": "Max number of Spark SQL result to display.",
- "type": "number"
- },
- "master":{
- "envName": "Master",
- "propertyName": "spark.master",
- "value": "local[*]",
- "description": "Spark master uri. ex) spark://masterhost:7077",
- "type": "string"
- },
- "spark.app.name": {
- "envName": "SPARK_APP_NAME",
- "propertyName": "spark.app.name",
- "value": "Zeppelin",
- "description": "The name of spark application.",
- "type": "string"
- },
- "spark.driver.memory": {
- "envName": "MEMORY_DRIVER",
- "propertyName": "spark.driver.memory",
- "value": "DRIVER_MEMORY",
- "description": "",
- "type": "string"
- }
- },
- "interpreterGroup": [
- {
- "class": "org.apache.zeppelin.spark.SparkInterpreter",
- "name": "spark"
- },
- {
- "class": "org.apache.zeppelin.spark.PySparkInterpreter",
- "name": "pyspark"
- }
- ],
- "dependencies": [],
- "option": {
- "remote": true,
- "perNoteSession": false,
- "perNoteProcess": false,
- "isExistingProcess": false
- }
- },
"2C6RJRBD2": {
"id": "2C6RJRBD2",
"name": "local_interpreter_python3",
@@ -201,6 +94,13 @@
"value": "DRIVER_MEMORY",
"description": "",
"type": "string"
+ },
+ "spark.pyspark.python": {
+ "envName": "PYSPARK_PYTHON",
+ "propertyName": "spark.pyspark.python",
+ "value": "PYTHON_VENV_PATH",
+ "description": "",
+ "type": "string"
}
},
"interpreterGroup": [
diff --git
a/infrastructure-provisioning/src/zeppelin/scripts/configure_zeppelin_node.py
b/infrastructure-provisioning/src/zeppelin/scripts/configure_zeppelin_node.py
index 33a3c8b..fc4202e 100644
---
a/infrastructure-provisioning/src/zeppelin/scripts/configure_zeppelin_node.py
+++
b/infrastructure-provisioning/src/zeppelin/scripts/configure_zeppelin_node.py
@@ -58,6 +58,8 @@ spark_version = args.spark_version
hadoop_version = args.hadoop_version
scala_link = "https://www.scala-lang.org/files/archive/"
zeppelin_version = args.zeppelin_version
+python_venv_version = os.environ['notebook_python_venv_version']
+python_venv_path =
'/opt/python/python{0}/bin/python{1}'.format(python_venv_version,
python_venv_version[:3])
zeppelin_link = "https://archive.apache.org/dist/zeppelin/zeppelin-" +
zeppelin_version + "/zeppelin-" + \
zeppelin_version + "-bin-netinst.tgz"
if args.region == 'cn-north-1':
@@ -161,13 +163,14 @@ def configure_local_livy_kernels(args):
conn.sudo("systemctl start zeppelin-notebook")
-def configure_local_spark_kernels(args):
+def configure_local_spark_kernels(args, python_venv_path):
if not exists(conn,'/home/' + args.os_user +
'/.ensure_dir/local_spark_kernel_ensured'):
conn.put(templates_dir + 'interpreter_spark.json',
'/tmp/interpreter.json')
conn.sudo('sed -i "s|ENDPOINTURL|' + args.endpoint_url + '|g"
/tmp/interpreter.json')
conn.sudo('sed -i "s|OS_USER|' + args.os_user + '|g"
/tmp/interpreter.json')
spark_memory = get_spark_memory()
conn.sudo('sed -i "s|DRIVER_MEMORY|{}m|g"
/tmp/interpreter.json'.format(spark_memory))
+ conn.sudo('sed -i "s|PYTHON_VENV_PATH|{}m|g"
/tmp/interpreter.json'.format(python_venv_path))
update_zeppelin_interpreters(args.multiple_clusters, r_enabled,
'local')
conn.sudo('cp -f /tmp/interpreter.json
/opt/zeppelin/conf/interpreter.json')
conn.sudo('chown ' + args.os_user + ':' + args.os_user + ' -R
/opt/zeppelin/')
@@ -226,8 +229,12 @@ if __name__ == "__main__":
ensure_r(args.os_user, r_libs, args.region, args.r_mirror)
print("Install Python 3 modules")
ensure_python3_libraries(args.os_user)
- print("Install Python 3 specific version")
- ensure_python3_specific_version(python3_version, args.os_user)
+
+ # INSTALL PYTHON IN VIRTUALENV
+ print("Configure Python Virtualenv")
+ ensure_python_venv(python_venv_version)
+ #print("Install Python 3 specific version")
+ #ensure_python3_specific_version(python3_version, args.os_user)
# INSTALL SPARK AND CLOUD STORAGE JARS FOR SPARK
print("Install local Spark")
@@ -249,7 +256,7 @@ if __name__ == "__main__":
configure_local_livy_kernels(args)
else:
print("Configuring local kernels")
- configure_local_spark_kernels(args)
+ configure_local_spark_kernels(args, python_venv_path)
# INSTALL UNGIT
print("Install nodejs")
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]