[ https://issues.apache.org/jira/browse/SPARK-18786?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=15745867#comment-15745867 ]
Bryan Cutler commented on SPARK-18786: -------------------------------------- The problem is that {{SQLContext.getOrCreate(sc)}} will not reset even though a different {{SparkContext}} is used. With Spark 2.0, we are moving towards {{SparkSession}} so I don't think this is worth fixing. Using {{SparkSession}} like below doesn't seem to have this problem. {noformat} import sys sys.path.insert(1, 'spark/python/') sys.path.insert(1, 'spark/python/lib/py4j-0.9-src.zip') from pyspark.sql import SparkSession spark = SparkSession.builder.getOrCreate() spark.read.json(spark.sparkContext.parallelize(['{{ "name": "Adam" }}'])) spark.stop() spark = SparkSession.builder.getOrCreate() spark.read.json(spark.sparkContext.parallelize(['{{ "name": "Adam" }}'])) {noformat} > pySpark SQLContext.getOrCreate(sc) take stopped sparkContext > ------------------------------------------------------------ > > Key: SPARK-18786 > URL: https://issues.apache.org/jira/browse/SPARK-18786 > Project: Spark > Issue Type: Bug > Components: PySpark > Affects Versions: 1.6.0, 2.0.0 > Reporter: Alex Liu > > The following steps to reproduce the issue > {code} > import sys > sys.path.insert(1, 'spark/python/') > sys.path.insert(1, 'spark/python/lib/py4j-0.9-src.zip') > from pyspark import SparkContext, SQLContext > sc = SparkContext.getOrCreate() > sqlContext = SQLContext.getOrCreate(sc) > sqlContext.read.json(sc.parallelize(['{{ "name": "Adam" }}'])).show() > sc.stop() > sc = SparkContext.getOrCreate() > sqlContext = SQLContext.getOrCreate(sc) > sqlContext.read.json(sc.parallelize(['{{ "name": "Adam" }}'])).show() > {code} > It has the following errors after the last command > {code} > >>> sqlContext.read.json(sc.parallelize(['{{ "name": "Adam" }}'])).show() > Traceback (most recent call last): > > File "<stdin>", line 1, in <module> > File "spark/python/pyspark/sql/dataframe.py", line 257, in show > print(self._jdf.showString(n, truncate)) > File "spark/python/lib/py4j-0.9-src.zip/py4j/java_gateway.py", line 813, in > __call__ > File "spark/python/pyspark/sql/utils.py", line 45, in deco > return f(*a, **kw) > File "spark/python/lib/py4j-0.9-src.zip/py4j/protocol.py", line 308, in > get_return_value > py4j.protocol.Py4JJavaError: An error occurred while calling o435.showString. > : java.lang.IllegalStateException: Cannot call methods on a stopped > SparkContext. > This stopped SparkContext was created at: > org.apache.spark.api.java.JavaSparkContext.<init>(JavaSparkContext.scala:59) > sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method) > sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:62) > sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45) > java.lang.reflect.Constructor.newInstance(Constructor.java:422) > py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:234) > py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:381) > py4j.Gateway.invoke(Gateway.java:214) > py4j.commands.ConstructorCommand.invokeConstructor(ConstructorCommand.java:79) > py4j.commands.ConstructorCommand.execute(ConstructorCommand.java:68) > py4j.GatewayConnection.run(GatewayConnection.java:209) > java.lang.Thread.run(Thread.java:745) > The currently active SparkContext was created at: > org.apache.spark.api.java.JavaSparkContext.<init>(JavaSparkContext.scala:59) > sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method) > sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:62) > sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45) > java.lang.reflect.Constructor.newInstance(Constructor.java:422) > py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:234) > py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:381) > py4j.Gateway.invoke(Gateway.java:214) > py4j.commands.ConstructorCommand.invokeConstructor(ConstructorCommand.java:79) > py4j.commands.ConstructorCommand.execute(ConstructorCommand.java:68) > py4j.GatewayConnection.run(GatewayConnection.java:209) > java.lang.Thread.run(Thread.java:745) > > at > org.apache.spark.SparkContext.org$apache$spark$SparkContext$$assertNotStopped(SparkContext.scala:106) > at org.apache.spark.SparkContext.broadcast(SparkContext.scala:1325) > at > org.apache.spark.sql.execution.datasources.DataSourceStrategy$.apply(DataSourceStrategy.scala:126) > at > org.apache.spark.sql.catalyst.planning.QueryPlanner$$anonfun$1.apply(QueryPlanner.scala:58) > at > org.apache.spark.sql.catalyst.planning.QueryPlanner$$anonfun$1.apply(QueryPlanner.scala:58) > at scala.collection.Iterator$$anon$13.hasNext(Iterator.scala:371) > at > org.apache.spark.sql.catalyst.planning.QueryPlanner.plan(QueryPlanner.scala:59) > at > org.apache.spark.sql.catalyst.planning.QueryPlanner.planLater(QueryPlanner.scala:54) > at > org.apache.spark.sql.execution.SparkStrategies$BasicOperators$.apply(SparkStrategies.scala:349) > at > org.apache.spark.sql.catalyst.planning.QueryPlanner$$anonfun$1.apply(QueryPlanner.scala:58) > at > org.apache.spark.sql.catalyst.planning.QueryPlanner$$anonfun$1.apply(QueryPlanner.scala:58) > at scala.collection.Iterator$$anon$13.hasNext(Iterator.scala:371) > at > org.apache.spark.sql.catalyst.planning.QueryPlanner.plan(QueryPlanner.scala:59) > at > org.apache.spark.sql.execution.QueryExecution.sparkPlan$lzycompute(QueryExecution.scala:47) > at > org.apache.spark.sql.execution.QueryExecution.sparkPlan(QueryExecution.scala:45) > at > org.apache.spark.sql.execution.QueryExecution.executedPlan$lzycompute(QueryExecution.scala:52) > at > org.apache.spark.sql.execution.QueryExecution.executedPlan(QueryExecution.scala:52) > at org.apache.spark.sql.DataFrame.withCallback(DataFrame.scala:2095) > at org.apache.spark.sql.DataFrame.head(DataFrame.scala:1374) > at org.apache.spark.sql.DataFrame.take(DataFrame.scala:1456) > at org.apache.spark.sql.DataFrame.showString(DataFrame.scala:170) > at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) > at > sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) > at > sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) > at java.lang.reflect.Method.invoke(Method.java:497) > at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:231) > at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:381) > at py4j.Gateway.invoke(Gateway.java:259) > at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:133) > at py4j.commands.CallCommand.execute(CallCommand.java:79) > at py4j.GatewayConnection.run(GatewayConnection.java:209) > at java.lang.Thread.run(Thread.java:745) > >>> sc.parallelize(['{{ "name": "Adam" }}']).collect() > ['{{ "name": "Adam" }}'] > {code} > It doesn't fail in spark-shell. -- This message was sent by Atlassian JIRA (v6.3.4#6332) --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@spark.apache.org For additional commands, e-mail: issues-h...@spark.apache.org