This is an automated email from the ASF dual-hosted git repository.
lfrolov pushed a commit to branch develop
in repository https://gitbox.apache.org/repos/asf/incubator-datalab.git
The following commit(s) were added to refs/heads/develop by this push:
new 8b3e86b [DATALAB-2542]: Custom Zeppelin
new 1fd62ea Merge pull request #1321 from owlleg6/develop
8b3e86b is described below
commit 8b3e86b6c1a30befab999ebf56d5bd65969bea55
Author: oleh_mykolaishyn <[email protected]>
AuthorDate: Thu Nov 4 10:46:45 2021 +0200
[DATALAB-2542]: Custom Zeppelin
---
.../scripts/deploy_repository/deploy_repository.py | 2 +-
.../src/general/conf/datalab.ini | 2 +-
.../src/general/lib/os/fab.py | 6 +-
.../general/templates/azure/interpreter_spark.json | 926 ++++++++++++++++-----
.../zeppelin/scripts/configure_zeppelin_node.py | 27 +-
5 files changed, 718 insertions(+), 245 deletions(-)
diff --git
a/infrastructure-provisioning/scripts/deploy_repository/deploy_repository.py
b/infrastructure-provisioning/scripts/deploy_repository/deploy_repository.py
index 54a5435..67f648c 100644
--- a/infrastructure-provisioning/scripts/deploy_repository/deploy_repository.py
+++ b/infrastructure-provisioning/scripts/deploy_repository/deploy_repository.py
@@ -1215,7 +1215,7 @@ def download_packages():
'https://cntk.ai/PythonWheel/GPU/cntk-{}-cp35-cp35m-linux_x86_64.whl'.format(
configuration['notebook_cntk_version']),
'https://www.python.org/ftp/python/{0}/Python-{0}.tgz'.format(python3_version),
-
'http://archive.apache.org/dist/zeppelin/zeppelin-{0}/zeppelin-{0}-bin-netinst.tgz'.format(
+
'https://nexus.develop.dlabanalytics.com/repository/packages-public/zeppelin-{}-prebuilt.tar.gz'.format(
configuration['notebook_zeppelin_version']),
'http://archive.cloudera.com/beta/livy/livy-server-{}.zip'.format(
configuration['notebook_livy_version']),
diff --git a/infrastructure-provisioning/src/general/conf/datalab.ini
b/infrastructure-provisioning/src/general/conf/datalab.ini
index c6b1ac4..701ef97 100644
--- a/infrastructure-provisioning/src/general/conf/datalab.ini
+++ b/infrastructure-provisioning/src/general/conf/datalab.ini
@@ -287,7 +287,7 @@ python_venv_version = 3.7.9
### Version of TensorFlow to be installed on notebook
tensorflow_version = 2.5.0
### Version of Zeppelin to be installed on notebook
-zeppelin_version = 0.9.0
+zeppelin_version = 0.9.1
### Version of Rstudio to be installed on notebook
rstudio_version = 1.4.1103
### Version of Scala to be installed on notebook
diff --git a/infrastructure-provisioning/src/general/lib/os/fab.py
b/infrastructure-provisioning/src/general/lib/os/fab.py
index 0ba17b5..c2e9833 100644
--- a/infrastructure-provisioning/src/general/lib/os/fab.py
+++ b/infrastructure-provisioning/src/general/lib/os/fab.py
@@ -1157,16 +1157,20 @@ def install_r_packages(os_user):
conn.sudo('R -e "install.packages(\'devtools\', repos =
\'https://cloud.r-project.org\')"')
conn.sudo('R -e "install.packages(\'knitr\', repos =
\'https://cloud.r-project.org\')"')
conn.sudo('R -e "install.packages(\'ggplot2\', repos =
\'https://cloud.r-project.org\')"')
+ conn.sudo('R -e "install.packages(\'markdown\', repos =
\'https://cloud.r-project.org\')"')
conn.sudo('R -e "install.packages(c(\'devtools\',\'mplot\',
\'googleVis\'), '
'repos = \'https://cloud.r-project.org\');
require(devtools); install_github(\'ramnathv/rCharts\')"')
conn.sudo('R -e \'install.packages("versions",
repos="https://cloud.r-project.org", dep=TRUE)\'')
conn.sudo('touch /home/' + os_user +
'/.ensure_dir/r_packages_ensured')
+ conn.sudo("systemctl stop zeppelin-notebook")
+ conn.sudo("systemctl daemon-reload")
+ conn.sudo("systemctl enable zeppelin-notebook")
+ conn.sudo("systemctl start zeppelin-notebook")
except Exception as err:
logging.error('Function install_r_packages error:', str(err))
traceback.print_exc()
sys.exit(1)
-
def update_zeppelin_interpreters(multiple_clusters, r_enabled,
interpreter_mode='remote'):
try:
interpreters_config = '/opt/zeppelin/conf/interpreter.json'
diff --git
a/infrastructure-provisioning/src/general/templates/azure/interpreter_spark.json
b/infrastructure-provisioning/src/general/templates/azure/interpreter_spark.json
index 56eb105..f10d445 100644
---
a/infrastructure-provisioning/src/general/templates/azure/interpreter_spark.json
+++
b/infrastructure-provisioning/src/general/templates/azure/interpreter_spark.json
@@ -1,249 +1,625 @@
{
"interpreterSettings": {
- "2C6RJRBD1": {
- "id": "2C6RJRBD1",
- "name": "local_interpreter_python2",
- "group": "spark",
+ "python": {
+ "id": "python",
+ "name": "python",
+ "group": "python",
"properties": {
- "zeppelin.spark.printREPLOutput": {
- "propertyName": "zeppelin.spark.printREPLOutput",
- "value": "true",
- "description": "Print REPL output",
- "type": "checkbox"
- },
- "zeppelin.dep.additionalRemoteRepository": {
- "envName": "ZEPPELIN_DEP_ADDITIONALREMOTEREPOSITORY",
- "propertyName": "zeppelin.dep.additionalRemoteRepository",
- "value":
"spark-packages,http://dl.bintray.com/spark-packages/maven,false;",
- "description": "",
- "type": "string"
- },
- "zeppelin.spark.sql.stacktrace": {
- "envName": "ZEPPELIN_SPARK_SQL_STACKTRACE",
- "propertyName": "zeppelin.spark.sql.stacktrace",
- "value": "false",
- "description": "",
- "type": "checkbox"
- },
- "zeppelin.spark.importImplicit":{
- "envName": "ZEPPELIN_SPARK_IMPORTIMPLICIT",
- "propertyName": "zeppelin.spark.importImplicit",
- "value": "true",
- "description": "",
- "type": "checkbox"
- },
- "zeppelin.spark.concurrentSQL": {
- "envName": "ZEPPELIN_SPARK_CONCURRENTSQL",
- "propertyName": "zeppelin.spark.concurrentSQL",
- "value": "false",
- "description": "",
- "type": "checkbox"
- },
- "zeppelin.spark.useHiveContext": {
- "envName": "ZEPPELIN_SPARK_USEHIVECONTEXT",
- "propertyName": "zeppelin.spark.useHiveContext",
- "value": "true",
- "description": "Use HiveContext instead of SQLContext if it is
true.",
- "type": "checkbox"
- },
- "zeppelin.pyspark.python": {
- "envName": "ZEPPELIN_PYSPARK_PYTHON",
- "propertyName": "zeppelin.pyspark.python",
- "value": "python",
- "description": "",
- "type": "string"
- },
- "zeppelin.dep.localrepo": {
- "envName": "ZEPPELIN_DEP_LOCALREPO",
- "propertyName": "zeppelin.dep.localrepo",
- "value": "local-repo",
- "description": "",
- "type": "string"
- },
- "zeppelin.spark.maxResult": {
- "envName": "ZEPPELIN_SPARK_MAXRESULT",
- "propertyName": "zeppelin.spark.maxResult",
- "value": "1000",
- "description": "Max number of Spark SQL result to display.",
- "type": "number"
- },
- "master":{
- "envName": "Master",
- "propertyName": "spark.master",
- "value": "local[*]",
- "description": "Spark master uri. ex) spark://masterhost:7077",
- "type": "string"
- },
- "spark.app.name": {
- "envName": "SPARK_APP_NAME",
- "propertyName": "spark.app.name",
- "value": "Zeppelin",
- "description": "The name of spark application.",
- "type": "string"
- },
- "spark.hadoop.fs.s3a.endpoint": {
- "envName": "SPARK_HADOOP_FS_S3A_ENDPOINT",
- "propertyName": "spark.hadoop.fs.s3a.endpoint",
- "value": "ENDPOINTURL",
- "description": "",
- "type": "string"
- },
- "spark.driver.memory": {
- "envName": "MEMORY_DRIVER",
- "propertyName": "spark.driver.memory",
- "value": "DRIVER_MEMORY",
- "description": "",
- "type": "string"
- }
+ "zeppelin.python": {
+ "name": "zeppelin.python",
+ "value": "PYTHON_VENV_PATH",
+ "type": "string",
+ "description": "Python binary executable path. It is set to python
by default.(assume python is in your $PATH)"
+ },
+ "zeppelin.python.maxResult": {
+ "name": "zeppelin.python.maxResult",
+ "value": "1000",
+ "type": "number",
+ "description": "Max number of dataframe rows to display."
+ },
+ "zeppelin.python.useIPython": {
+ "name": "zeppelin.python.useIPython",
+ "value": true,
+ "type": "checkbox",
+ "description": "Whether use IPython when it is available in
`%python`"
+ },
+ "zeppelin.ipython.launch.timeout": {
+ "name": "zeppelin.ipython.launch.timeout",
+ "value": "30000",
+ "type": "number",
+ "description": "Time out for ipython launch"
+ },
+ "zeppelin.ipython.grpc.message_size": {
+ "name": "zeppelin.ipython.grpc.message_size",
+ "value": "33554432",
+ "type": "number",
+ "description": "grpc message size, default is 32M"
+ }
},
+ "status": "READY",
"interpreterGroup": [
{
- "class": "org.apache.zeppelin.spark.SparkInterpreter",
- "name": "spark"
+ "name": "python",
+ "class": "org.apache.zeppelin.python.PythonInterpreter",
+ "defaultInterpreter": true,
+ "editor": {
+ "language": "python",
+ "editOnDblClick": false,
+ "completionSupport": true
+ }
},
{
- "class": "org.apache.zeppelin.spark.PySparkInterpreter",
- "name": "pyspark"
+ "name": "ipython",
+ "class": "org.apache.zeppelin.python.IPythonInterpreter",
+ "defaultInterpreter": false,
+ "editor": {
+ "language": "python",
+ "editOnDblClick": false,
+ "completionKey": "TAB",
+ "completionSupport": true
+ }
+ },
+ {
+ "name": "sql",
+ "class": "org.apache.zeppelin.python.PythonInterpreterPandasSql",
+ "defaultInterpreter": false,
+ "editor": {
+ "language": "sql",
+ "editOnDblClick": false,
+ "completionKey": "TAB",
+ "completionSupport": false
+ }
}
],
"dependencies": [],
"option": {
"remote": true,
- "perNoteSession": false,
- "perNoteProcess": false,
- "isExistingProcess": false
+ "port": -1,
+ "perNote": "shared",
+ "perUser": "shared",
+ "isExistingProcess": false,
+ "setPermission": false,
+ "owners": [],
+ "isUserImpersonate": false
}
},
- "2C6RJRBD2": {
- "id": "2C6RJRBD2",
- "name": "local_interpreter_python3",
- "group": "spark",
+ "livy": {
+ "id": "livy",
+ "name": "livy",
+ "group": "livy",
"properties": {
- "zeppelin.spark.printREPLOutput": {
- "propertyName": "zeppelin.spark.printREPLOutput",
- "value": "true",
- "description": "Print REPL output",
- "type": "checkbox"
- },
- "zeppelin.dep.additionalRemoteRepository": {
- "envName": "ZEPPELIN_DEP_ADDITIONALREMOTEREPOSITORY",
- "propertyName": "zeppelin.dep.additionalRemoteRepository",
- "value":
"spark-packages,http://dl.bintray.com/spark-packages/maven,false;",
- "description": "",
- "type": "string"
- },
- "zeppelin.spark.sql.stacktrace": {
- "envName": "ZEPPELIN_SPARK_SQL_STACKTRACE",
- "propertyName": "zeppelin.spark.sql.stacktrace",
- "value": "false",
- "description": "",
- "type": "checkbox"
- },
- "zeppelin.spark.importImplicit":{
- "envName": "ZEPPELIN_SPARK_IMPORTIMPLICIT",
- "propertyName": "zeppelin.spark.importImplicit",
- "value": "true",
- "description": "",
- "type": "checkbox"
- },
- "zeppelin.spark.concurrentSQL": {
- "envName": "ZEPPELIN_SPARK_CONCURRENTSQL",
- "propertyName": "zeppelin.spark.concurrentSQL",
- "value": "false",
- "description": "",
- "type": "checkbox"
- },
- "zeppelin.spark.useHiveContext": {
- "envName": "ZEPPELIN_SPARK_USEHIVECONTEXT",
- "propertyName": "zeppelin.spark.useHiveContext",
- "value": "true",
- "description": "Use HiveContext instead of SQLContext if it is
true.",
- "type": "checkbox"
- },
- "zeppelin.pyspark.python": {
- "envName": "ZEPPELIN_PYSPARK_PYTHON",
- "propertyName": "zeppelin.pyspark.python",
- "value": "python3.5",
- "description": "",
- "type": "string"
- },
- "zeppelin.dep.localrepo": {
- "envName": "ZEPPELIN_DEP_LOCALREPO",
- "propertyName": "zeppelin.dep.localrepo",
- "value": "local-repo",
- "description": "",
- "type": "string"
- },
- "zeppelin.spark.maxResult": {
- "envName": "ZEPPELIN_SPARK_MAXRESULT",
- "propertyName": "zeppelin.spark.maxResult",
- "value": "1000",
- "description": "Max number of Spark SQL result to display.",
- "type": "number"
- },
- "master":{
- "envName": "Master",
- "propertyName": "spark.master",
- "value": "local[*]",
- "description": "Spark master uri. ex) spark://masterhost:7077",
- "type": "string"
- },
- "spark.app.name": {
- "envName": "SPARK_APP_NAME",
- "propertyName": "spark.app.name",
- "value": "Zeppelin",
- "description": "The name of spark application.",
- "type": "string"
- },
- "spark.hadoop.fs.s3a.endpoint": {
- "envName": "SPARK_HADOOP_FS_S3A_ENDPOINT",
- "propertyName": "spark.hadoop.fs.s3a.endpoint",
- "value": "ENDPOINTURL",
- "description": "",
- "type": "string"
- },
- "spark.driver.memory": {
- "envName": "MEMORY_DRIVER",
- "propertyName": "spark.driver.memory",
- "value": "DRIVER_MEMORY",
- "description": "",
- "type": "string"
- }
+ "zeppelin.livy.url": {
+ "name": "zeppelin.livy.url",
+ "value": "http://localhost:8998",
+ "type": "url",
+ "description": "The URL for Livy Server."
+ },
+ "zeppelin.livy.session.create_timeout": {
+ "name": "zeppelin.livy.session.create_timeout",
+ "value": "120",
+ "type": "number",
+ "description": "Livy Server create session timeout (seconds)."
+ },
+ "livy.spark.driver.cores": {
+ "name": "livy.spark.driver.cores",
+ "value": "",
+ "type": "number",
+ "description": "Driver cores. ex) 1, 2"
+ },
+ "livy.spark.driver.memory": {
+ "name": "livy.spark.driver.memory",
+ "value": "1g",
+ "type": "string",
+ "description": "Driver memory. ex) 512m, 32g"
+ },
+ "livy.spark.executor.instances": {
+ "name": "livy.spark.executor.instances",
+ "value": "",
+ "type": "number",
+ "description": "Executor instances. ex) 1, 4"
+ },
+ "livy.spark.executor.cores": {
+ "name": "livy.spark.executor.cores",
+ "value": "",
+ "type": "number",
+ "description": "Num cores per executor. ex) 1, 4"
+ },
+ "livy.spark.executor.memory": {
+ "name": "livy.spark.executor.memory",
+ "value": "",
+ "type": "string",
+ "description": "Executor memory per worker instance. ex) 512m, 32g"
+ },
+ "livy.spark.dynamicAllocation.enabled": {
+ "name": "livy.spark.dynamicAllocation.enabled",
+ "value": false,
+ "type": "checkbox",
+ "description": "Use dynamic resource allocation"
+ },
+ "livy.spark.dynamicAllocation.cachedExecutorIdleTimeout": {
+ "name": "livy.spark.dynamicAllocation.cachedExecutorIdleTimeout",
+ "value": "",
+ "type": "string",
+ "description": "Remove an executor which has cached data blocks"
+ },
+ "livy.spark.dynamicAllocation.minExecutors": {
+ "name": "livy.spark.dynamicAllocation.minExecutors",
+ "value": "",
+ "type": "number",
+ "description": "Lower bound for the number of executors if dynamic
allocation is enabled."
+ },
+ "livy.spark.dynamicAllocation.initialExecutors": {
+ "name": "livy.spark.dynamicAllocation.initialExecutors",
+ "value": "",
+ "type": "number",
+ "description": "Initial number of executors to run if dynamic
allocation is enabled."
+ },
+ "livy.spark.dynamicAllocation.maxExecutors": {
+ "name": "livy.spark.dynamicAllocation.maxExecutors",
+ "value": "",
+ "type": "number",
+ "description": "Upper bound for the number of executors if dynamic
allocation is enabled."
+ },
+ "zeppelin.livy.principal": {
+ "name": "zeppelin.livy.principal",
+ "value": "",
+ "type": "string",
+ "description": "Kerberos principal to authenticate livy"
+ },
+ "zeppelin.livy.keytab": {
+ "name": "zeppelin.livy.keytab",
+ "value": "",
+ "type": "textarea",
+ "description": "Kerberos keytab to authenticate livy"
+ },
+ "zeppelin.livy.pull_status.interval.millis": {
+ "name": "zeppelin.livy.pull_status.interval.millis",
+ "value": "1000",
+ "type": "number",
+ "description": "The interval for checking paragraph execution status"
+ },
+ "zeppelin.livy.maxLogLines": {
+ "name": "zeppelin.livy.maxLogLines",
+ "value": "1000",
+ "type": "number",
+ "description": "Max number of lines of logs"
+ },
+ "livy.spark.jars.packages": {
+ "name": "livy.spark.jars.packages",
+ "value": "",
+ "type": "textarea",
+ "description": "Adding extra libraries to livy interpreter"
+ },
+ "zeppelin.livy.displayAppInfo": {
+ "name": "zeppelin.livy.displayAppInfo",
+ "value": true,
+ "type": "checkbox",
+ "description": "Whether display app info"
+ },
+ "zeppelin.livy.restart_dead_session": {
+ "name": "zeppelin.livy.restart_dead_session",
+ "value": false,
+ "type": "checkbox",
+ "description": "Whether restart a dead session"
+ },
+ "zeppelin.livy.spark.sql.maxResult": {
+ "name": "zeppelin.livy.spark.sql.maxResult",
+ "value": "1000",
+ "type": "number",
+ "description": "Max number of Spark SQL result to display."
+ },
+ "zeppelin.livy.spark.sql.field.truncate": {
+ "name": "zeppelin.livy.spark.sql.field.truncate",
+ "value": true,
+ "type": "checkbox",
+ "description": "If true, truncate field values longer than 20
characters."
+ },
+ "zeppelin.livy.concurrentSQL": {
+ "name": "zeppelin.livy.concurrentSQL",
+ "value": false,
+ "type": "checkbox",
+ "description": "Execute multiple SQL concurrently if set true."
+ }
},
+ "status": "READY",
"interpreterGroup": [
{
- "class": "org.apache.zeppelin.spark.SparkInterpreter",
- "name": "spark"
+ "name": "spark",
+ "class": "org.apache.zeppelin.livy.LivySparkInterpreter",
+ "defaultInterpreter": true,
+ "editor": {
+ "language": "scala",
+ "editOnDblClick": false,
+ "completionKey": "TAB",
+ "completionSupport": true
+ }
},
{
- "class": "org.apache.zeppelin.spark.PySparkInterpreter",
- "name": "pyspark"
+ "name": "sql",
+ "class": "org.apache.zeppelin.livy.LivySparkSQLInterpreter",
+ "defaultInterpreter": false,
+ "editor": {
+ "language": "sql",
+ "editOnDblClick": false,
+ "completionKey": "TAB",
+ "completionSupport": true
+ }
+ },
+ {
+ "name": "pyspark",
+ "class": "org.apache.zeppelin.livy.LivyPySparkInterpreter",
+ "defaultInterpreter": false,
+ "editor": {
+ "language": "python",
+ "editOnDblClick": false,
+ "completionKey": "TAB",
+ "completionSupport": true
+ }
+ },
+ {
+ "name": "pyspark3",
+ "class": "org.apache.zeppelin.livy.LivyPySpark3Interpreter",
+ "defaultInterpreter": false,
+ "editor": {
+ "language": "python",
+ "editOnDblClick": false,
+ "completionKey": "TAB",
+ "completionSupport": true
+ }
+ },
+ {
+ "name": "sparkr",
+ "class": "org.apache.zeppelin.livy.LivySparkRInterpreter",
+ "defaultInterpreter": false,
+ "editor": {
+ "language": "r",
+ "editOnDblClick": false,
+ "completionKey": "TAB",
+ "completionSupport": true
+ }
+ },
+ {
+ "name": "shared",
+ "class": "org.apache.zeppelin.livy.LivySharedInterpreter",
+ "defaultInterpreter": false
}
],
"dependencies": [],
"option": {
"remote": true,
- "perNoteSession": false,
- "perNoteProcess": false,
- "isExistingProcess": false
+ "port": -1,
+ "perNote": "shared",
+ "perUser": "scoped",
+ "isExistingProcess": false,
+ "setPermission": false,
+ "owners": [],
+ "isUserImpersonate": false
}
},
- "2D89JUYA9": {
- "id": "2D89JUYA9",
- "name": "md",
- "group": "md",
+ "spark-submit": {
+ "id": "spark-submit",
+ "name": "spark-submit",
+ "group": "spark-submit",
"properties": {
- "markdown.parser.type": "pegdown"
+ "SPARK_HOME": {
+ "name": "SPARK_HOME",
+ "value": "/opt/spark",
+ "type": "string",
+ "description": "Location of spark distribution"
+ }
},
"status": "READY",
"interpreterGroup": [
{
- "name": "md",
- "class": "org.apache.zeppelin.markdown.Markdown",
+ "name": "submit",
+ "class": "org.apache.zeppelin.spark.submit.SparkSubmitInterpreter",
+ "defaultInterpreter": true,
+ "editor": {
+ "language": "sh",
+ "editOnDblClick": false,
+ "completionSupport": false
+ }
+ }
+ ],
+ "dependencies": [],
+ "option": {
+ "remote": true,
+ "port": -1,
+ "isExistingProcess": false,
+ "setPermission": false,
+ "owners": [],
+ "isUserImpersonate": false
+ }
+ },
+ "spark": {
+ "id": "spark",
+ "name": "spark",
+ "group": "spark",
+ "properties": {
+ "SPARK_HOME": {
+ "name": "SPARK_HOME",
+ "value": "/opt/spark",
+ "type": "string",
+ "description": "Location of spark distribution"
+ },
+ "spark.master": {
+ "name": "spark.master",
+ "value": "local[*]",
+ "type": "string",
+ "description": "Spark master uri. local | yarn-client | yarn-cluster
| spark master address of standalone mode, ex) spark://master_host:7077"
+ },
+ "spark.submit.deployMode": {
+ "name": "spark.submit.deployMode",
+ "value": "",
+ "type": "string",
+ "description": "The deploy mode of Spark driver program, either
\"client\" or \"cluster\", Which means to launch driver program locally
(\"client\") or remotely (\"cluster\") on one of the nodes inside the cluster."
+ },
+ "spark.app.name": {
+ "name": "spark.app.name",
+ "value": "",
+ "type": "string",
+ "description": "The name of spark application."
+ },
+ "spark.driver.cores": {
+ "name": "spark.driver.cores",
+ "value": "1",
+ "type": "number",
+ "description": "Number of cores to use for the driver process, only
in cluster mode."
+ },
+ "spark.driver.memory": {
+ "name": "spark.driver.memory",
+ "value": "DRIVER_MEMORY",
+ "type": "string",
+ "description": "Amount of memory to use for the driver process, i.e.
where SparkContext is initialized, in the same format as JVM memory strings
with a size unit suffix (\"k\", \"m\", \"g\" or \"t\") (e.g. 512m, 2g)."
+ },
+ "spark.executor.cores": {
+ "name": "spark.executor.cores",
+ "value": "1",
+ "type": "number",
+ "description": "The number of cores to use on each executor"
+ },
+ "spark.executor.memory": {
+ "name": "spark.executor.memory",
+ "value": "1g",
+ "type": "string",
+ "description": "Executor memory per worker instance. ex) 512m, 32g"
+ },
+ "spark.executor.instances": {
+ "name": "spark.executor.instances",
+ "value": "2",
+ "type": "number",
+ "description": "The number of executors for static allocation."
+ },
+ "spark.files": {
+ "name": "spark.files",
+ "value": "",
+ "type": "string",
+ "description": "Comma-separated list of files to be placed in the
working directory of each executor. Globs are allowed."
+ },
+ "spark.jars": {
+ "name": "spark.jars",
+ "value": "",
+ "type": "string",
+ "description": "Comma-separated list of jars to include on the
driver and executor classpaths. Globs are allowed."
+ },
+ "spark.jars.packages": {
+ "name": "spark.jars.packages",
+ "value": "",
+ "type": "string",
+ "description": "Comma-separated list of Maven coordinates of jars to
include on the driver and executor classpaths. The coordinates should be
groupId:artifactId:version. If spark.jars.ivySettings is given artifacts will
be resolved according to the configuration in the file, otherwise artifacts
will be searched for in the local maven repo, then maven central and finally
any additional remote repositories given by the command-line option
--repositories."
+ },
+ "zeppelin.spark.useHiveContext": {
+ "name": "zeppelin.spark.useHiveContext",
+ "value": true,
+ "type": "checkbox",
+ "description": "Use HiveContext instead of SQLContext if it is true.
Enable hive for SparkSession."
+ },
+ "zeppelin.spark.run.asLoginUser": {
+ "name": "zeppelin.spark.run.asLoginUser",
+ "value": true,
+ "type": "checkbox",
+ "description": "Whether run spark job as the zeppelin login user, it
is only applied when running spark job in hadoop yarn cluster and shiro is
enabled"
+ },
+ "zeppelin.spark.printREPLOutput": {
+ "name": "zeppelin.spark.printREPLOutput",
+ "value": true,
+ "type": "checkbox",
+ "description": "Print REPL output"
+ },
+ "zeppelin.spark.maxResult": {
+ "name": "zeppelin.spark.maxResult",
+ "value": "1000",
+ "type": "number",
+ "description": "Max number of result to display."
+ },
+ "zeppelin.spark.enableSupportedVersionCheck": {
+ "name": "zeppelin.spark.enableSupportedVersionCheck",
+ "value": true,
+ "type": "checkbox",
+ "description": "Whether checking supported spark version. Developer
only setting, not for production use"
+ },
+ "zeppelin.spark.uiWebUrl": {
+ "name": "zeppelin.spark.uiWebUrl",
+ "value": "",
+ "type": "string",
+ "description": "Override Spark UI default URL. In Kubernetes mode,
value can be Jinja template string with 3 template variables \u0027PORT\u0027,
\u0027SERVICE_NAME\u0027 and \u0027SERVICE_DOMAIN\u0027. (ex:
http://{{PORT}}-{{SERVICE_NAME}}.{{SERVICE_DOMAIN}})"
+ },
+ "zeppelin.spark.ui.hidden": {
+ "name": "zeppelin.spark.ui.hidden",
+ "value": false,
+ "type": "checkbox",
+ "description": "Whether hide spark ui in zeppelin ui"
+ },
+ "spark.webui.yarn.useProxy": {
+ "name": "spark.webui.yarn.useProxy",
+ "value": false,
+ "type": "checkbox",
+ "description": "whether use yarn proxy url as spark weburl, e.g.
http://localhost:8088/proxy/application_1583396598068_0004"
+ },
+ "zeppelin.spark.scala.color": {
+ "name": "zeppelin.spark.scala.color",
+ "value": true,
+ "type": "checkbox",
+ "description": "Whether enable color output of spark scala
interpreter"
+ },
+ "zeppelin.spark.deprecatedMsg.show": {
+ "name": "zeppelin.spark.deprecatedMsg.show",
+ "value": true,
+ "type": "checkbox",
+ "description": "Whether show the spark deprecated message, spark 2.2
and before are deprecated. Zeppelin will display warning message by default"
+ },
+ "zeppelin.spark.concurrentSQL": {
+ "name": "zeppelin.spark.concurrentSQL",
+ "value": true,
+ "type": "checkbox",
+ "description": "Execute multiple SQL concurrently if set true."
+ },
+ "zeppelin.spark.concurrentSQL.max": {
+ "name": "zeppelin.spark.concurrentSQL.max",
+ "value": "10",
+ "type": "number",
+ "description": "Max number of SQL concurrently executed"
+ },
+ "zeppelin.spark.sql.stacktrace": {
+ "name": "zeppelin.spark.sql.stacktrace",
+ "value": true,
+ "type": "checkbox",
+ "description": "Show full exception stacktrace for SQL queries if
set to true."
+ },
+ "zeppelin.spark.sql.interpolation": {
+ "name": "zeppelin.spark.sql.interpolation",
+ "value": false,
+ "type": "checkbox",
+ "description": "Enable ZeppelinContext variable interpolation into
spark sql"
+ },
+ "PYSPARK_PYTHON": {
+ "name": "PYSPARK_PYTHON",
+ "value": "PYTHON_VENV_PATH",
+ "type": "string",
+ "description": "Python binary executable to use for PySpark in both
driver and workers (default is python2.7 if available, otherwise python).
Property `spark.pyspark.python` take precedence if it is set"
+ },
+ "PYSPARK_DRIVER_PYTHON": {
+ "name": "PYSPARK_DRIVER_PYTHON",
+ "value": "PYTHON_VENV_PATH",
+ "type": "string",
+ "description": "Python binary executable to use for PySpark in
driver only (default is `PYSPARK_PYTHON`). Property
`spark.pyspark.driver.python` take precedence if it is set"
+ },
+ "zeppelin.pyspark.useIPython": {
+ "name": "zeppelin.pyspark.useIPython",
+ "value": true,
+ "type": "checkbox",
+ "description": "Whether use IPython when it is available"
+ },
+ "zeppelin.R.knitr": {
+ "name": "zeppelin.R.knitr",
+ "value": true,
+ "type": "checkbox",
+ "description": "Whether use knitr or not"
+ },
+ "zeppelin.R.cmd": {
+ "name": "zeppelin.R.cmd",
+ "value": "R",
+ "type": "string",
+ "description": "R binary executable path"
+ },
+ "zeppelin.R.image.width": {
+ "name": "zeppelin.R.image.width",
+ "value": "100%",
+ "type": "number",
+ "description": "Image width of R plotting"
+ },
+ "zeppelin.R.render.options": {
+ "name": "zeppelin.R.render.options",
+ "value": "out.format \u003d \u0027html\u0027, comment \u003d NA,
echo \u003d FALSE, results \u003d \u0027asis\u0027, message \u003d F, warning
\u003d F, fig.retina \u003d 2",
+ "type": "textarea",
+ "description": ""
+ },
+ "zeppelin.R.shiny.portRange": {
+ "name": "zeppelin.R.shiny.portRange",
+ "value": ":",
+ "type": "string",
+ "description": "Shiny app would launch a web app at some port, this
property is to specify the portRange via format
\u0027\u003cstart\u003e:\u003cend\u003e\u0027, e.g. \u00275000:5001\u0027. By
default it is \u0027:\u0027 which means any port"
+ },
+ "spark.hadoop.fs.s3a.endpoint": {
+ "envName": "SPARK_HADOOP_FS_S3A_ENDPOINT",
+ "propertyName": "spark.hadoop.fs.s3a.endpoint",
+ "value": "ENDPOINTURL",
+ "description": "",
+ "type": "string"
+ },
+ "zeppelin.kotlin.shortenTypes": {
+ "name": "zeppelin.kotlin.shortenTypes",
+ "value": true,
+ "type": "checkbox",
+ "description": "Show short types instead of full, e.g.
List\u003cString\u003e or kotlin.collections.List\u003ckotlin.String\u003e"
+ }
+ },
+ "status": "READY",
+ "interpreterGroup": [
+ {
+ "name": "spark",
+ "class": "org.apache.zeppelin.spark.SparkInterpreter",
+ "defaultInterpreter": true,
+ "editor": {
+ "language": "scala",
+ "editOnDblClick": false,
+ "completionKey": "TAB",
+ "completionSupport": true
+ }
+ },
+ {
+ "name": "sql",
+ "class": "org.apache.zeppelin.spark.SparkSqlInterpreter",
"defaultInterpreter": false,
"editor": {
- "language": "markdown",
- "editOnDblClick": true
+ "language": "sql",
+ "editOnDblClick": false,
+ "completionKey": "TAB",
+ "completionSupport": true
+ }
+ },
+ {
+ "name": "pyspark",
+ "class": "org.apache.zeppelin.spark.PySparkInterpreter",
+ "defaultInterpreter": false,
+ "editor": {
+ "language": "python",
+ "editOnDblClick": false,
+ "completionKey": "TAB",
+ "completionSupport": true
+ }
+ },
+ {
+ "name": "ipyspark",
+ "class": "org.apache.zeppelin.spark.IPySparkInterpreter",
+ "defaultInterpreter": false,
+ "editor": {
+ "language": "python",
+ "editOnDblClick": false,
+ "completionSupport": true,
+ "completionKey": "TAB"
+ }
+ },
+ {
+ "name": "r",
+ "class": "org.apache.zeppelin.spark.SparkRInterpreter",
+ "defaultInterpreter": false,
+ "editor": {
+ "language": "r",
+ "editOnDblClick": false,
+ "completionSupport": false,
+ "completionKey": "TAB"
+ }
+ },
+ {
+ "name": "ir",
+ "class": "org.apache.zeppelin.spark.SparkIRInterpreter",
+ "defaultInterpreter": false,
+ "editor": {
+ "language": "r",
+ "editOnDblClick": false,
+ "completionSupport": true,
+ "completionKey": "TAB"
}
}
],
@@ -255,32 +631,123 @@
"perUser": "shared",
"isExistingProcess": false,
"setPermission": false,
- "users": [],
+ "owners": [],
"isUserImpersonate": false
}
},
- "2DG931872": {
- "id": "2DG931872",
+ "sh": {
+ "id": "sh",
"name": "sh",
"group": "sh",
"properties": {
- "zeppelin.interpreter.output.limit": "102400",
- "shell.working.directory.user.home": "false",
- "zeppelin.shell.keytab.location": "",
- "shell.command.timeout.millisecs": "60000",
- "zeppelin.interpreter.localRepo": "/opt/zeppelin/local-repo/2DG931872",
- "zeppelin.shell.principal": "",
- "zeppelin.shell.auth.type": ""
+ "shell.command.timeout.millisecs": {
+ "name": "shell.command.timeout.millisecs",
+ "value": "60000",
+ "type": "number",
+ "description": "Shell command time out in millisecs. Default \u003d
60000"
+ },
+ "shell.command.timeout.check.interval": {
+ "name": "shell.command.timeout.check.interval",
+ "value": "60000",
+ "type": "number",
+ "description": "Shell command output check interval in millisecs.
Default \u003d 10000"
+ },
+ "zeppelin.interpreter.localRepo": "/opt/zeppelin/local-repo/sh",
+ "shell.working.directory.user.home": {
+ "name": "shell.working.directory.user.home",
+ "value": false,
+ "type": "checkbox",
+ "description": "If this set to true, the shell\u0027s working
directory will be set to user home"
+ },
+ "zeppelin.shell.auth.type": {
+ "name": "zeppelin.shell.auth.type",
+ "value": "",
+ "type": "string",
+ "description": "If auth type is needed, Example: KERBEROS"
+ },
+ "zeppelin.shell.keytab.location": {
+ "name": "zeppelin.shell.keytab.location",
+ "value": "",
+ "type": "string",
+ "description": "Kerberos keytab location"
+ },
+ "zeppelin.shell.principal": {
+ "name": "zeppelin.shell.principal",
+ "value": "",
+ "type": "string",
+ "description": "Kerberos principal"
+ },
+ "zeppelin.shell.interpolation": {
+ "name": "zeppelin.shell.interpolation",
+ "value": false,
+ "type": "checkbox",
+ "description": "Enable ZeppelinContext variable interpolation into
paragraph text"
+ },
+ "zeppelin.terminal.ip.mapping": {
+ "name": "zeppelin.terminal.ip.mapping",
+ "value": "",
+ "type": "string",
+ "description": "Internal and external IP mapping of zeppelin server"
+ }
},
"status": "READY",
"interpreterGroup": [
{
"name": "sh",
"class": "org.apache.zeppelin.shell.ShellInterpreter",
+ "defaultInterpreter": true,
+ "editor": {
+ "language": "sh",
+ "editOnDblClick": false,
+ "completionSupport": false
+ }
+ },
+ {
+ "name": "terminal",
+ "class": "org.apache.zeppelin.shell.TerminalInterpreter",
"defaultInterpreter": false,
"editor": {
"language": "sh",
- "editOnDblClick": false
+ "editOnDblClick": false,
+ "completionSupport": false
+ },
+ "config": {
+ "checkEmpty": false
+ }
+ }
+ ],
+ "dependencies": [],
+ "option": {
+ "remote": true,
+ "port": -1,
+ "isExistingProcess": false,
+ "setPermission": false,
+ "owners": [],
+ "isUserImpersonate": false
+ }
+ },
+ "md": {
+ "id": "md",
+ "name": "md",
+ "group": "md",
+ "properties": {
+ "markdown.parser.type": {
+ "name": "markdown.parser.type",
+ "value": "pegdown",
+ "type": "string",
+ "description": "Markdown Parser Type. Available values: pegdown,
markdown4j, flexmark. Default \u003d flexmark"
+ }
+ },
+ "status": "READY",
+ "interpreterGroup": [
+ {
+ "name": "md",
+ "class": "org.apache.zeppelin.markdown.Markdown",
+ "defaultInterpreter": false,
+ "editor": {
+ "language": "markdown",
+ "editOnDblClick": true,
+ "completionSupport": false
}
}
],
@@ -292,21 +759,18 @@
"perUser": "shared",
"isExistingProcess": false,
"setPermission": false,
- "users": [],
+ "owners": [],
"isUserImpersonate": false
}
}
},
- "interpreterBindings": {
- "2C5NV42W8": [
- "2C3B8E6M6"
- ]
- },
"interpreterRepositories": [
{
"id": "central",
"type": "default",
- "url": "https://repo1.maven.org/maven2/",
+ "url": "https://repo1.maven.org/maven2",
+ "host": "repo1.maven.org",
+ "protocol": "https",
"releasePolicy": {
"enabled": true,
"updatePolicy": "daily",
@@ -341,6 +805,8 @@
"id": "local",
"type": "default",
"url": "file:///root/.m2/repository",
+ "host": "",
+ "protocol": "file",
"releasePolicy": {
"enabled": true,
"updatePolicy": "daily",
@@ -355,4 +821,4 @@
"repositoryManager": false
}
]
-}
\ No newline at end of file
+}
diff --git
a/infrastructure-provisioning/src/zeppelin/scripts/configure_zeppelin_node.py
b/infrastructure-provisioning/src/zeppelin/scripts/configure_zeppelin_node.py
index 896bc08..868b289 100644
---
a/infrastructure-provisioning/src/zeppelin/scripts/configure_zeppelin_node.py
+++
b/infrastructure-provisioning/src/zeppelin/scripts/configure_zeppelin_node.py
@@ -57,10 +57,9 @@ spark_version = args.spark_version
hadoop_version = args.hadoop_version
scala_link = "https://www.scala-lang.org/files/archive/"
zeppelin_version = args.zeppelin_version
+zeppelin_link =
"https://nexus.develop.dlabanalytics.com/repository/packages-public/zeppelin-"+
zeppelin_version +"-prebuilt.tar.gz"
python_venv_version = os.environ['notebook_python_venv_version']
python_venv_path =
'/opt/python/python{0}/bin/python{1}'.format(python_venv_version,
python_venv_version[:3])
-zeppelin_link = "https://archive.apache.org/dist/zeppelin/zeppelin-" +
zeppelin_version + "/zeppelin-" + \
- zeppelin_version + "-bin-netinst.tgz"
if args.region == 'cn-north-1':
spark_link = "http://mirrors.hust.edu.cn/apache/spark/spark-" +
spark_version + "/spark-" + spark_version + \
"-bin-hadoop" + hadoop_version + ".tgz"
@@ -81,11 +80,12 @@ gitlab_certfile = os.environ['conf_gitlab_certfile']
def configure_zeppelin(os_user):
if not exists(conn,'/home/' + os_user + '/.ensure_dir/zeppelin_ensured'):
try:
- conn.sudo('wget ' + zeppelin_link + ' -O /tmp/zeppelin-' +
zeppelin_version + '-bin-netinst.tgz')
- conn.sudo('tar -zxvf /tmp/zeppelin-' + zeppelin_version +
'-bin-netinst.tgz -C /opt/')
- conn.sudo('ln -s /opt/zeppelin-' + zeppelin_version +
'-bin-netinst /opt/zeppelin')
+ # test nexus
+ conn.sudo('wget ' + zeppelin_link + ' -O /tmp/zeppelin-' +
zeppelin_version + '-prebuilt.tar.gz')
+ conn.sudo('tar -zxvf /tmp/zeppelin-' + zeppelin_version +
'-prebuilt.tar.gz -C /opt/')
+ conn.sudo('ln -s /opt/zeppelin-' + zeppelin_version +
'-prebuilt.tar.gz /opt/zeppelin')
conn.sudo('cp /opt/zeppelin/conf/zeppelin-env.sh.template
/opt/zeppelin/conf/zeppelin-env.sh')
- java_home = conn.run("update-alternatives --query java | grep -o
\'/.*/java-8.*/jre\'").stdout.splitlines()[0].replace('\n','')
+ java_home = conn.run("update-alternatives --query java | grep -o
\'/.*/java-8.*/jre\'").stdout.splitlines()[0].replace('\n', '')
conn.sudo('''bash -c "echo 'export JAVA_HOME=\'{}\'' >>
/opt/zeppelin/conf/zeppelin-env.sh" '''.format(java_home))
conn.sudo('cp /opt/zeppelin/conf/zeppelin-site.xml.template
/opt/zeppelin/conf/zeppelin-site.xml')
conn.sudo('sed -i \"/# export ZEPPELIN_PID_DIR/c\export
ZEPPELIN_PID_DIR=/var/run/zeppelin\" /opt/zeppelin/conf/zeppelin-env.sh')
@@ -95,15 +95,16 @@ def configure_zeppelin(os_user):
conn.sudo('sed -i \'s/127.0.0.1/0.0.0.0/g\'
/opt/zeppelin/conf/zeppelin-site.xml')
conn.sudo('mkdir /var/log/zeppelin')
conn.sudo('mkdir /var/run/zeppelin')
- conn.sudo('ln -s /var/log/zeppelin /opt/zeppelin-' +
zeppelin_version + '-bin-netinst/logs')
+ conn.sudo('ln -s /var/log/zeppelin /opt/zeppelin/logs')
conn.sudo('chown ' + os_user + ':' + os_user + ' -R
/var/log/zeppelin')
- conn.sudo('ln -s /var/run/zeppelin /opt/zeppelin-' +
zeppelin_version + '-bin-netinst/run')
+ conn.sudo('ln -s /var/run/zeppelin /opt/zeppelin/run')
conn.sudo('chown ' + os_user + ':' + os_user + ' -R
/var/run/zeppelin')
conn.sudo('''bash -l -c '/opt/zeppelin/bin/install-interpreter.sh
--name {} --proxy-url $http_proxy' '''.format(zeppelin_interpreters))
- conn.sudo('chown ' + os_user + ':' + os_user + ' -R
/opt/zeppelin-' + zeppelin_version + '-bin-netinst')
+ conn.sudo('''bash -l -c '/opt/zeppelin/bin/install-interpreter.sh
--name sh --artifact /opt/zeppelin/interpreter/sh/zeppelin-shell-*.jar
--proxy-url $http_proxy' ''')
+ conn.sudo('chown ' + os_user + ':' + os_user + ' -R /opt/zeppelin')
conn.sudo('mkdir -p /opt/zeppelin/lib/interpreter/')
- conn.sudo('cp /opt/zeppelin-' + zeppelin_version +
'-bin-netinst/interpreter/md/zeppelin-markdown-*.jar
/opt/zeppelin/lib/interpreter/') # necessary when executing paragraph launches
java process with "-cp :/opt/zeppelin/lib/interpreter/*:"
- conn.sudo('cp /opt/zeppelin-' + zeppelin_version +
'-bin-netinst/interpreter/sh/zeppelin-shell-*.jar
/opt/zeppelin/lib/interpreter/')
+ conn.sudo('cp /opt/zeppelin/interpreter/md/zeppelin-markdown-*.jar
/opt/zeppelin/lib/interpreter/') # necessary when executing paragraph launches
java process with "-cp :/opt/zeppelin/lib/interpreter/*:"
+ conn.sudo('cp /opt/zeppelin/interpreter/sh/zeppelin-shell-*.jar
/opt/zeppelin/lib/interpreter/')
except Exception as err:
print('Error:', str(err))
sys.exit(1)
@@ -169,12 +170,14 @@ def configure_local_spark_kernels(args, python_venv_path):
conn.sudo('sed -i "s|OS_USER|' + args.os_user + '|g"
/tmp/interpreter.json')
spark_memory = get_spark_memory()
conn.sudo('sed -i "s|DRIVER_MEMORY|{}m|g"
/tmp/interpreter.json'.format(spark_memory))
- conn.sudo('sed -i "s|PYTHON_VENV_PATH|{}m|g"
/tmp/interpreter.json'.format(python_venv_path))
+ conn.sudo('sed -i "s|PYTHON_VENV_PATH|{}|g"
/tmp/interpreter.json'.format(python_venv_path))
update_zeppelin_interpreters(args.multiple_clusters, r_enabled,
'local')
conn.sudo('cp -f /tmp/interpreter.json
/opt/zeppelin/conf/interpreter.json')
conn.sudo('chown ' + args.os_user + ':' + args.os_user + ' -R
/opt/zeppelin/')
conn.sudo('touch /home/' + args.os_user +
'/.ensure_dir/local_spark_kernel_ensured')
+ conn.sudo("systemctl stop zeppelin-notebook")
conn.sudo("systemctl daemon-reload")
+ conn.sudo("systemctl enable zeppelin-notebook")
conn.sudo("systemctl start zeppelin-notebook")
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]