I am trying to configure the SaprkContext to use the kubernetes master as the 
scheduler. The jupyter notebook is running on a windows machine physically 
separate from the kubernetes cluster.

Im on kubernetes 1.21, spark 3.1.1 with hadoop 2.7

Below is my source:

# Set some vars to specify where the kubernetes master is
kubernetes_master_ip = "15.4.7.11"
kubernetes_master_port = "6443"
spark_master_url = "k8s://https://{0}:{1}".format(kubernetes_master_ip, 
kubernetes_master_port)

import pyspark

# Wire up the SparkConf object
sparkConf = pyspark.SparkConf()
sparkConf.setMaster(spark_master_url)
sparkConf.setAppName("spark")

sparkConf.set("spark.kubernetes.container.image", 
"kublr/spark-py:2.4.0-hadoop-2.7")
sparkConf.set("spark.kubernetes.namespace", "spark")
sparkConf.set("spark.kubernetes.pyspark.pythonVersion", "3")
sparkConf.set("spark.kubernetes.authenticate.driver.serviceAccountName", 
"spark")
sparkConf.set("spark.kubernetes.authenticate.serviceAccountName", "spark")
sparkConf.set("spark.kubernetes.authenticate.submission.caCertFile", 
"api_server.crt")
sparkConf.set("spark.kubernetes.authenticate.oauthTokenFile", 
"service_account.token")

sparkConf.set("spark.executor.instances", "3")
sparkConf.set("spark.executor.cores", "2")
sparkConf.set("spark.executor.memory", "512m")

sparkConf.set("spark.driver.memory", "512m")

# Create the spark context
from pyspark.sql import SparkSession
spark = SparkSession.builder.config(conf=sparkConf).getOrCreate()
sc = spark.sparkContext

Here is the error:

ERROR:root:Exception while sending command.
Traceback (most recent call last):
  File 
"C:\spark\spark-3.1.1-bin-hadoop2.7\python\lib\py4j-0.10.9-src.zip\py4j\java_gateway.py",
 line 1200, in send_command
    answer = smart_decode(self.stream.readline()[:-1])
  File "c:\program files\python\python 3.8\lib\socket.py", line 669, in readinto
    return self._sock.recv_into(b)
ConnectionResetError: [WinError 10054] An existing connection was forcibly 
closed by the remote host

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File 
"C:\spark\spark-3.1.1-bin-hadoop2.7\python\lib\py4j-0.10.9-src.zip\py4j\java_gateway.py",
 line 1033, in send_command
    response = connection.send_command(command)
  File 
"C:\spark\spark-3.1.1-bin-hadoop2.7\python\lib\py4j-0.10.9-src.zip\py4j\java_gateway.py",
 line 1211, in send_command
    raise Py4JNetworkError(
py4j.protocol.Py4JNetworkError: Error while receiving
ERROR:py4j.java_gateway:An error occurred while trying to connect to the Java 
server (127.0.0.1:27279)
Traceback (most recent call last):
  File "c:\program files\python\python 
3.8\lib\site-packages\IPython\core\interactiveshell.py", line 3331, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-8-c2f5d539b7f2>", line 2, in <module>
    spark = SparkSession.builder.config(conf=sparkConf).getOrCreate()
  File "C:\spark\spark-3.1.1-bin-hadoop2.7\python\pyspark\sql\session.py", line 
228, in getOrCreate
    sc = SparkContext.getOrCreate(sparkConf)
  File "C:\spark\spark-3.1.1-bin-hadoop2.7\python\pyspark\context.py", line 
384, in getOrCreate
    SparkContext(conf=conf or SparkConf())
  File "C:\spark\spark-3.1.1-bin-hadoop2.7\python\pyspark\context.py", line 
146, in __init__
    self._do_init(master, appName, sparkHome, pyFiles, environment, batchSize, 
serializer,
  File "C:\spark\spark-3.1.1-bin-hadoop2.7\python\pyspark\context.py", line 
209, in _do_init
    self._jsc = jsc or self._initialize_context(self._conf._jconf)
  File "C:\spark\spark-3.1.1-bin-hadoop2.7\python\pyspark\context.py", line 
321, in _initialize_context
    return self._jvm.JavaSparkContext(jconf)
  File 
"C:\spark\spark-3.1.1-bin-hadoop2.7\python\lib\py4j-0.10.9-src.zip\py4j\java_gateway.py",
 line 1568, in __call__
    return_value = get_return_value(
  File 
"C:\spark\spark-3.1.1-bin-hadoop2.7\python\lib\py4j-0.10.9-src.zip\py4j\protocol.py",
 line 334, in get_return_value
    raise Py4JError(
py4j.protocol.Py4JError: An error occurred while calling 
None.org.apache.spark.api.java.JavaSparkContext

I have tried restarting the notebook etc

Can anyone point me at the right direction?

Reply via email to