realknorke commented on PR #4586:
URL: https://github.com/apache/zeppelin/pull/4586#issuecomment-1525682236
Thanks for the pull request. Its possible to connect to a 3.4.0 Spark
cluster. BUT Spark Connect is not working.
```
%spark-connect
spark.read.option("basePath",
"s3a://bucket/prefix").orc("s3a://bucket/prefix/day=2023-*/").head()
```
leads to
```
org.apache.zeppelin.interpreter.InterpreterException: java.io.IOException:
Fail to launch interpreter process:
Interpreter download command: java -Dfile.encoding=UTF-8
-Dlog4j.configuration=file:///opt/zeppelin//conf/log4j.properties
-Dlog4j.configurationFile=file:///opt/zeppelin//conf/log4j2.properties
-Dzeppelin.log.file=/opt/zeppelin//logs/zeppelin-interpreter-spark-connect-shared_process--amos.log
-cp
:/opt/zeppelin//interpreter/spark/*:::/opt/zeppelin//interpreter/zeppelin-interpreter-shaded-0.11.0-SNAPSHOT.jar::/opt/zeppelin//interpreter/spark/spark-interpreter-0.11.0-SNAPSHOT.jar:
org.apache.zeppelin.interpreter.remote.RemoteInterpreterDownloader
192.168.50.6 37391 spark-connect /opt/zeppelin//local-repo/spark-connect
[INFO] Interpreter launch command: /opt/spark/bin/spark-submit --class
org.apache.zeppelin.interpreter.remote.RemoteInterpreterServer
--driver-class-path
:/opt/zeppelin//local-repo/spark-connect/*:/opt/zeppelin//interpreter/spark/*:::/opt/zeppelin//interpreter/zeppelin-interpreter-shaded-0.11.0-SNAPSHOT.jar::/opt/zeppelin//interpreter/spark/spark-interpreter-0.11.0-SNAPSHOT.jar:
--driver-java-options -Dfile.encoding=UTF-8
-Dlog4j.configuration=file:///opt/zeppelin//conf/log4j.properties
-Dlog4j.configurationFile=file:///opt/zeppelin//conf/log4j2.properties
-Dzeppelin.log.file=/opt/zeppelin//logs/zeppelin-interpreter-spark-connect-shared_process--amos.log
--conf spark.executor.memory=1g --conf spark.master=sc://localhost:15002
--conf spark.driver.memory=1g --conf spark.driver.cores=1 --conf
spark.jars.packages=org.apache.spark:spark-connect_2.12:3.4.0 --conf
spark.executor.cores=1 --conf spark.app.name=spark-connect-shared_process
--conf spark.executor.instances=2 --conf spark.we
bui.yarn.useProxy=false
/opt/zeppelin//interpreter/spark/spark-interpreter-0.11.0-SNAPSHOT.jar:
192.168.50.6 37391 spark-connect-shared_process :
Exception in thread "main" org.apache.spark.SparkException: Master must
either be yarn or start with spark, mesos, k8s, or local
at org.apache.spark.deploy.SparkSubmit.error(SparkSubmit.scala:1038)
at
org.apache.spark.deploy.SparkSubmit.prepareSubmitEnvironment(SparkSubmit.scala:254)
at
org.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:955)
at
org.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:192)
at org.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:215)
at org.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:91)
at
org.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:1111)
at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:1120)
at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)
at
org.apache.zeppelin.interpreter.remote.RemoteInterpreter.open(RemoteInterpreter.java:129)
at
org.apache.zeppelin.interpreter.remote.RemoteInterpreter.getFormType(RemoteInterpreter.java:271)
at org.apache.zeppelin.notebook.Paragraph.jobRun(Paragraph.java:429)
at org.apache.zeppelin.notebook.Paragraph.jobRun(Paragraph.java:69)
at org.apache.zeppelin.scheduler.Job.run(Job.java:172)
at
org.apache.zeppelin.scheduler.AbstractScheduler.runJob(AbstractScheduler.java:132)
at
org.apache.zeppelin.scheduler.RemoteScheduler$JobRunner.run(RemoteScheduler.java:182)
at
java.base/java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:515)
at java.base/java.util.concurrent.FutureTask.run(FutureTask.java:264)
at
java.base/java.util.concurrent.ScheduledThreadPoolExecutor$ScheduledFutureTask.run(ScheduledThreadPoolExecutor.java:304)
at
java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1130)
at
java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:630)
at java.base/java.lang.Thread.run(Thread.java:832)
Caused by: java.io.IOException: Fail to launch interpreter process:
Interpreter download command: java -Dfile.encoding=UTF-8
-Dlog4j.configuration=file:///opt/zeppelin//conf/log4j.properties
-Dlog4j.configurationFile=file:///opt/zeppelin//conf/log4j2.properties
-Dzeppelin.log.file=/opt/zeppelin//logs/zeppelin-interpreter-spark-connect-shared_process--amos.log
-cp
:/opt/zeppelin//interpreter/spark/*:::/opt/zeppelin//interpreter/zeppelin-interpreter-shaded-0.11.0-SNAPSHOT.jar::/opt/zeppelin//interpreter/spark/spark-interpreter-0.11.0-SNAPSHOT.jar:
org.apache.zeppelin.interpreter.remote.RemoteInterpreterDownloader
192.168.50.6 37391 spark-connect /opt/zeppelin//local-repo/spark-connect
[INFO] Interpreter launch command: /opt/spark/bin/spark-submit --class
org.apache.zeppelin.interpreter.remote.RemoteInterpreterServer
--driver-class-path
:/opt/zeppelin//local-repo/spark-connect/*:/opt/zeppelin//interpreter/spark/*:::/opt/zeppelin//interpreter/zeppelin-interpreter-shaded-0.11.0-SNAPSHOT.jar::/opt/zeppelin//interpreter/spark/spark-interpreter-0.11.0-SNAPSHOT.jar:
--driver-java-options -Dfile.encoding=UTF-8
-Dlog4j.configuration=file:///opt/zeppelin//conf/log4j.properties
-Dlog4j.configurationFile=file:///opt/zeppelin//conf/log4j2.properties
-Dzeppelin.log.file=/opt/zeppelin//logs/zeppelin-interpreter-spark-connect-shared_process--amos.log
--conf spark.executor.memory=1g --conf spark.master=sc://localhost:15002
--conf spark.driver.memory=1g --conf spark.driver.cores=1 --conf
spark.jars.packages=org.apache.spark:spark-connect_2.12:3.4.0 --conf
spark.executor.cores=1 --conf spark.app.name=spark-connect-shared_process
--conf spark.executor.instances=2 --conf spark.we
bui.yarn.useProxy=false
/opt/zeppelin//interpreter/spark/spark-interpreter-0.11.0-SNAPSHOT.jar:
192.168.50.6 37391 spark-connect-shared_process :
Exception in thread "main" org.apache.spark.SparkException: Master must
either be yarn or start with spark, mesos, k8s, or local
at org.apache.spark.deploy.SparkSubmit.error(SparkSubmit.scala:1038)
at
org.apache.spark.deploy.SparkSubmit.prepareSubmitEnvironment(SparkSubmit.scala:254)
at
org.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:955)
at
org.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:192)
at org.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:215)
at org.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:91)
at
org.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:1111)
at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:1120)
at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)
at
org.apache.zeppelin.interpreter.remote.ExecRemoteInterpreterProcess.start(ExecRemoteInterpreterProcess.java:97)
at
org.apache.zeppelin.interpreter.ManagedInterpreterGroup.getOrCreateInterpreterProcess(ManagedInterpreterGroup.java:68)
at
org.apache.zeppelin.interpreter.remote.RemoteInterpreter.getOrCreateInterpreterProcess(RemoteInterpreter.java:104)
at
org.apache.zeppelin.interpreter.remote.RemoteInterpreter.internal_create(RemoteInterpreter.java:154)
at
org.apache.zeppelin.interpreter.remote.RemoteInterpreter.open(RemoteInterpreter.java:126)
... 12 more
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]