[ https://issues.apache.org/jira/browse/SPARK-3457?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
Shay Rojansky updated SPARK-3457: --------------------------------- Description: Just downloaded Spark 1.1.0-rc4. Launching pyspark for the very first time in yarn-client mode (no additional params or anything), I got the exception below. Rerunning pyspark 5 times afterwards did not reproduce the issue. {code} 14/09/09 18:07:58 INFO YarnClientSchedulerBackend: Application report from ASM: appMasterRpcPort: 0 appStartTime: 1410275267606 yarnAppState: RUNNING 14/09/09 18:07:58 INFO YarnClientSchedulerBackend: Add WebUI Filter. org.apache.hadoop.yarn.server.webproxy.amfilter.AmIpFilter, PROXY_HOST=master. grid.eaglerd.local,PROXY_URI_BASE=http://master.grid.eaglerd.local:8088/proxy/application_1410268447887_0011, /proxy/application_1410268447887_0011 Traceback (most recent call last): File "/opt/spark/python/pyspark/shell.py", line 44, in <module> 14/09/09 18:07:58 INFO JettyUtils: Adding filter: org.apache.hadoop.yarn.server.webproxy.amfilter.AmIpFilter sc = SparkContext(appName="PySparkShell", pyFiles=add_files) File "/opt/spark/python/pyspark/context.py", line 107, in __init__ conf) File "/opt/spark/python/pyspark/context.py", line 155, in _do_init self._jsc = self._initialize_context(self._conf._jconf) File "/opt/spark/python/pyspark/context.py", line 201, in _initialize_context return self._jvm.JavaSparkContext(jconf) File "/opt/spark/python/lib/py4j-0.8.2.1-src.zip/py4j/java_gateway.py", line 701, in __call__ File "/opt/spark/python/lib/py4j-0.8.2.1-src.zip/py4j/protocol.py", line 300, in get_return_value py4j.protocol.Py4JJavaError: An error occurred while calling None.org.apache.spark.api.java.JavaSparkContext. : java.util.ConcurrentModificationException at java.util.Hashtable$Enumerator.next(Hashtable.java:1167) at scala.collection.convert.Wrappers$JPropertiesWrapper$$anon$3.next(Wrappers.scala:458) at scala.collection.convert.Wrappers$JPropertiesWrapper$$anon$3.next(Wrappers.scala:454) at scala.collection.Iterator$class.toStream(Iterator.scala:1143) at scala.collection.AbstractIterator.toStream(Iterator.scala:1157) at scala.collection.Iterator$$anonfun$toStream$1.apply(Iterator.scala:1143) at scala.collection.Iterator$$anonfun$toStream$1.apply(Iterator.scala:1143) at scala.collection.immutable.Stream$Cons.tail(Stream.scala:1085) at scala.collection.immutable.Stream$Cons.tail(Stream.scala:1077) at scala.collection.immutable.Stream$$anonfun$filteredTail$1.apply(Stream.scala:1149) at scala.collection.immutable.Stream$$anonfun$filteredTail$1.apply(Stream.scala:1149) at scala.collection.immutable.Stream$Cons.tail(Stream.scala:1085) at scala.collection.immutable.Stream$Cons.tail(Stream.scala:1077) at scala.collection.immutable.Stream.length(Stream.scala:284) at scala.collection.SeqLike$class.sorted(SeqLike.scala:608) at scala.collection.AbstractSeq.sorted(Seq.scala:40) at org.apache.spark.SparkEnv$.environmentDetails(SparkEnv.scala:324) at org.apache.spark.SparkContext.postEnvironmentUpdate(SparkContext.scala:1297) at org.apache.spark.SparkContext.<init>(SparkContext.scala:334) at org.apache.spark.api.java.JavaSparkContext.<init>(JavaSparkContext.scala:53) at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method) at sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:57) at sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45) at java.lang.reflect.Constructor.newInstance(Constructor.java:526) at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:234) at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:379) at py4j.Gateway.invoke(Gateway.java:214) at py4j.commands.ConstructorCommand.invokeConstructor(ConstructorCommand.java:79) at py4j.commands.ConstructorCommand.execute(ConstructorCommand.java:68) at py4j.GatewayConnection.run(GatewayConnection.java:207) at java.lang.Thread.run(Thread.java:745) {code} was: Just downloaded Spark 1.1.0-rc4. Launching pyspark for the very first time in yarn-client mode (no additional params or anything), I got the exception below. Rerunning pyspark 5 times afterwards did not reproduce the issue. 14/09/09 18:07:58 INFO YarnClientSchedulerBackend: Application report from ASM: appMasterRpcPort: 0 appStartTime: 1410275267606 yarnAppState: RUNNING 14/09/09 18:07:58 INFO YarnClientSchedulerBackend: Add WebUI Filter. org.apache.hadoop.yarn.server.webproxy.amfilter.AmIpFilter, PROXY_HOST=master. grid.eaglerd.local,PROXY_URI_BASE=http://master.grid.eaglerd.local:8088/proxy/application_1410268447887_0011, /proxy/application_1410268447887_0011 Traceback (most recent call last): File "/opt/spark/python/pyspark/shell.py", line 44, in <module> 14/09/09 18:07:58 INFO JettyUtils: Adding filter: org.apache.hadoop.yarn.server.webproxy.amfilter.AmIpFilter sc = SparkContext(appName="PySparkShell", pyFiles=add_files) File "/opt/spark/python/pyspark/context.py", line 107, in __init__ conf) File "/opt/spark/python/pyspark/context.py", line 155, in _do_init self._jsc = self._initialize_context(self._conf._jconf) File "/opt/spark/python/pyspark/context.py", line 201, in _initialize_context return self._jvm.JavaSparkContext(jconf) File "/opt/spark/python/lib/py4j-0.8.2.1-src.zip/py4j/java_gateway.py", line 701, in __call__ File "/opt/spark/python/lib/py4j-0.8.2.1-src.zip/py4j/protocol.py", line 300, in get_return_value py4j.protocol.Py4JJavaError: An error occurred while calling None.org.apache.spark.api.java.JavaSparkContext. : java.util.ConcurrentModificationException at java.util.Hashtable$Enumerator.next(Hashtable.java:1167) at scala.collection.convert.Wrappers$JPropertiesWrapper$$anon$3.next(Wrappers.scala:458) at scala.collection.convert.Wrappers$JPropertiesWrapper$$anon$3.next(Wrappers.scala:454) at scala.collection.Iterator$class.toStream(Iterator.scala:1143) at scala.collection.AbstractIterator.toStream(Iterator.scala:1157) at scala.collection.Iterator$$anonfun$toStream$1.apply(Iterator.scala:1143) at scala.collection.Iterator$$anonfun$toStream$1.apply(Iterator.scala:1143) at scala.collection.immutable.Stream$Cons.tail(Stream.scala:1085) at scala.collection.immutable.Stream$Cons.tail(Stream.scala:1077) at scala.collection.immutable.Stream$$anonfun$filteredTail$1.apply(Stream.scala:1149) at scala.collection.immutable.Stream$$anonfun$filteredTail$1.apply(Stream.scala:1149) at scala.collection.immutable.Stream$Cons.tail(Stream.scala:1085) at scala.collection.immutable.Stream$Cons.tail(Stream.scala:1077) at scala.collection.immutable.Stream.length(Stream.scala:284) at scala.collection.SeqLike$class.sorted(SeqLike.scala:608) at scala.collection.AbstractSeq.sorted(Seq.scala:40) at org.apache.spark.SparkEnv$.environmentDetails(SparkEnv.scala:324) at org.apache.spark.SparkContext.postEnvironmentUpdate(SparkContext.scala:1297) at org.apache.spark.SparkContext.<init>(SparkContext.scala:334) at org.apache.spark.api.java.JavaSparkContext.<init>(JavaSparkContext.scala:53) at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method) at sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:57) at sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45) at java.lang.reflect.Constructor.newInstance(Constructor.java:526) at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:234) at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:379) at py4j.Gateway.invoke(Gateway.java:214) at py4j.commands.ConstructorCommand.invokeConstructor(ConstructorCommand.java:79) at py4j.commands.ConstructorCommand.execute(ConstructorCommand.java:68) at py4j.GatewayConnection.run(GatewayConnection.java:207) at java.lang.Thread.run(Thread.java:745) > ConcurrentModificationException starting up pyspark > --------------------------------------------------- > > Key: SPARK-3457 > URL: https://issues.apache.org/jira/browse/SPARK-3457 > Project: Spark > Issue Type: Bug > Components: Spark Core > Affects Versions: 1.1.0 > Environment: Hadoop 2.3 (CDH 5.1) on Ubuntu precise > Reporter: Shay Rojansky > > Just downloaded Spark 1.1.0-rc4. Launching pyspark for the very first time in > yarn-client mode (no additional params or anything), I got the exception > below. Rerunning pyspark 5 times afterwards did not reproduce the issue. > {code} > 14/09/09 18:07:58 INFO YarnClientSchedulerBackend: Application report from > ASM: > appMasterRpcPort: 0 > appStartTime: 1410275267606 > yarnAppState: RUNNING > 14/09/09 18:07:58 INFO YarnClientSchedulerBackend: Add WebUI Filter. > org.apache.hadoop.yarn.server.webproxy.amfilter.AmIpFilter, PROXY_HOST=master. > grid.eaglerd.local,PROXY_URI_BASE=http://master.grid.eaglerd.local:8088/proxy/application_1410268447887_0011, > /proxy/application_1410268447887_0011 > Traceback (most recent call last): > File "/opt/spark/python/pyspark/shell.py", line 44, in <module> > 14/09/09 18:07:58 INFO JettyUtils: Adding filter: > org.apache.hadoop.yarn.server.webproxy.amfilter.AmIpFilter > sc = SparkContext(appName="PySparkShell", pyFiles=add_files) > File "/opt/spark/python/pyspark/context.py", line 107, in __init__ > conf) > File "/opt/spark/python/pyspark/context.py", line 155, in _do_init > self._jsc = self._initialize_context(self._conf._jconf) > File "/opt/spark/python/pyspark/context.py", line 201, in > _initialize_context > return self._jvm.JavaSparkContext(jconf) > File "/opt/spark/python/lib/py4j-0.8.2.1-src.zip/py4j/java_gateway.py", > line 701, in __call__ > File "/opt/spark/python/lib/py4j-0.8.2.1-src.zip/py4j/protocol.py", line > 300, in get_return_value > py4j.protocol.Py4JJavaError: An error occurred while calling > None.org.apache.spark.api.java.JavaSparkContext. > : java.util.ConcurrentModificationException > at java.util.Hashtable$Enumerator.next(Hashtable.java:1167) > at > scala.collection.convert.Wrappers$JPropertiesWrapper$$anon$3.next(Wrappers.scala:458) > at > scala.collection.convert.Wrappers$JPropertiesWrapper$$anon$3.next(Wrappers.scala:454) > at scala.collection.Iterator$class.toStream(Iterator.scala:1143) > at scala.collection.AbstractIterator.toStream(Iterator.scala:1157) > at > scala.collection.Iterator$$anonfun$toStream$1.apply(Iterator.scala:1143) > at > scala.collection.Iterator$$anonfun$toStream$1.apply(Iterator.scala:1143) > at scala.collection.immutable.Stream$Cons.tail(Stream.scala:1085) > at scala.collection.immutable.Stream$Cons.tail(Stream.scala:1077) > at > scala.collection.immutable.Stream$$anonfun$filteredTail$1.apply(Stream.scala:1149) > at > scala.collection.immutable.Stream$$anonfun$filteredTail$1.apply(Stream.scala:1149) > at scala.collection.immutable.Stream$Cons.tail(Stream.scala:1085) > at scala.collection.immutable.Stream$Cons.tail(Stream.scala:1077) > at scala.collection.immutable.Stream.length(Stream.scala:284) > at scala.collection.SeqLike$class.sorted(SeqLike.scala:608) > at scala.collection.AbstractSeq.sorted(Seq.scala:40) > at org.apache.spark.SparkEnv$.environmentDetails(SparkEnv.scala:324) > at > org.apache.spark.SparkContext.postEnvironmentUpdate(SparkContext.scala:1297) > at org.apache.spark.SparkContext.<init>(SparkContext.scala:334) > at > org.apache.spark.api.java.JavaSparkContext.<init>(JavaSparkContext.scala:53) > at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native > Method) > at > sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:57) > at > sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45) > at java.lang.reflect.Constructor.newInstance(Constructor.java:526) > at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:234) > at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:379) > at py4j.Gateway.invoke(Gateway.java:214) > at > py4j.commands.ConstructorCommand.invokeConstructor(ConstructorCommand.java:79) > at > py4j.commands.ConstructorCommand.execute(ConstructorCommand.java:68) > at py4j.GatewayConnection.run(GatewayConnection.java:207) > at java.lang.Thread.run(Thread.java:745) > {code} -- This message was sent by Atlassian JIRA (v6.3.4#6332) --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@spark.apache.org For additional commands, e-mail: issues-h...@spark.apache.org