[jira] [Commented] (SPARK-1097) ConcurrentModificationException
[ https://issues.apache.org/jira/browse/SPARK-1097?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanelfocusedCommentId=14075216#comment-14075216 ] Patrick Wendell commented on SPARK-1097: A follow up to this fix is in Spark 1.0.2: https://github.com/apache/spark/pull/1409/files ConcurrentModificationException --- Key: SPARK-1097 URL: https://issues.apache.org/jira/browse/SPARK-1097 Project: Spark Issue Type: Bug Components: Spark Core Affects Versions: 0.9.0 Reporter: Fabrizio Milo Assignee: Raymond Liu Fix For: 1.0.1, 1.1.0, 1.0.2 Attachments: nravi_Conf_Spark-1388.patch {noformat} 14/02/16 08:18:45 WARN TaskSetManager: Loss was due to java.util.ConcurrentModificationException java.util.ConcurrentModificationException at java.util.HashMap$HashIterator.nextEntry(HashMap.java:926) at java.util.HashMap$KeyIterator.next(HashMap.java:960) at java.util.AbstractCollection.addAll(AbstractCollection.java:341) at java.util.HashSet.init(HashSet.java:117) at org.apache.hadoop.conf.Configuration.init(Configuration.java:554) at org.apache.hadoop.mapred.JobConf.init(JobConf.java:439) at org.apache.spark.rdd.HadoopRDD.getJobConf(HadoopRDD.scala:110) at org.apache.spark.rdd.HadoopRDD$$anon$1.init(HadoopRDD.scala:154) at org.apache.spark.rdd.HadoopRDD.compute(HadoopRDD.scala:149) at org.apache.spark.rdd.HadoopRDD.compute(HadoopRDD.scala:64) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:241) at org.apache.spark.rdd.RDD.iterator(RDD.scala:232) at org.apache.spark.rdd.MappedRDD.compute(MappedRDD.scala:31) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:241) at org.apache.spark.rdd.RDD.iterator(RDD.scala:232) at org.apache.spark.rdd.UnionPartition.iterator(UnionRDD.scala:32) at org.apache.spark.rdd.UnionRDD.compute(UnionRDD.scala:72) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:241) at org.apache.spark.rdd.RDD.iterator(RDD.scala:232) at org.apache.spark.rdd.FlatMappedRDD.compute(FlatMappedRDD.scala:33) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:241) at org.apache.spark.rdd.RDD.iterator(RDD.scala:232) at org.apache.spark.rdd.MappedRDD.compute(MappedRDD.scala:31) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:241) at org.apache.spark.rdd.RDD.iterator(RDD.scala:232) at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:161) at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:102) at org.apache.spark.scheduler.Task.run(Task.scala:53) at org.apache.spark.executor.Executor$TaskRunner$$anonfun$run$1.apply$mcV$sp(Executor.scala:213) at org.apache.spark.deploy.SparkHadoopUtil.runAsUser(SparkHadoopUtil.scala:49) at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:178) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615) at java.lang.Thread.run(Thread.java:744) {noformat} -- This message was sent by Atlassian JIRA (v6.2#6252)
[jira] [Commented] (SPARK-1097) ConcurrentModificationException
[ https://issues.apache.org/jira/browse/SPARK-1097?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanelfocusedCommentId=14048653#comment-14048653 ] Raymond Liu commented on SPARK-1097: Summit another PR to actually(hopes) fix this problem https://github.com/apache/spark/pull/1273 ConcurrentModificationException --- Key: SPARK-1097 URL: https://issues.apache.org/jira/browse/SPARK-1097 Project: Spark Issue Type: Bug Components: Spark Core Affects Versions: 0.9.0 Reporter: Fabrizio Milo Assignee: Nishkam Ravi Fix For: 1.0.1, 1.1.0 Attachments: nravi_Conf_Spark-1388.patch {noformat} 14/02/16 08:18:45 WARN TaskSetManager: Loss was due to java.util.ConcurrentModificationException java.util.ConcurrentModificationException at java.util.HashMap$HashIterator.nextEntry(HashMap.java:926) at java.util.HashMap$KeyIterator.next(HashMap.java:960) at java.util.AbstractCollection.addAll(AbstractCollection.java:341) at java.util.HashSet.init(HashSet.java:117) at org.apache.hadoop.conf.Configuration.init(Configuration.java:554) at org.apache.hadoop.mapred.JobConf.init(JobConf.java:439) at org.apache.spark.rdd.HadoopRDD.getJobConf(HadoopRDD.scala:110) at org.apache.spark.rdd.HadoopRDD$$anon$1.init(HadoopRDD.scala:154) at org.apache.spark.rdd.HadoopRDD.compute(HadoopRDD.scala:149) at org.apache.spark.rdd.HadoopRDD.compute(HadoopRDD.scala:64) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:241) at org.apache.spark.rdd.RDD.iterator(RDD.scala:232) at org.apache.spark.rdd.MappedRDD.compute(MappedRDD.scala:31) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:241) at org.apache.spark.rdd.RDD.iterator(RDD.scala:232) at org.apache.spark.rdd.UnionPartition.iterator(UnionRDD.scala:32) at org.apache.spark.rdd.UnionRDD.compute(UnionRDD.scala:72) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:241) at org.apache.spark.rdd.RDD.iterator(RDD.scala:232) at org.apache.spark.rdd.FlatMappedRDD.compute(FlatMappedRDD.scala:33) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:241) at org.apache.spark.rdd.RDD.iterator(RDD.scala:232) at org.apache.spark.rdd.MappedRDD.compute(MappedRDD.scala:31) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:241) at org.apache.spark.rdd.RDD.iterator(RDD.scala:232) at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:161) at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:102) at org.apache.spark.scheduler.Task.run(Task.scala:53) at org.apache.spark.executor.Executor$TaskRunner$$anonfun$run$1.apply$mcV$sp(Executor.scala:213) at org.apache.spark.deploy.SparkHadoopUtil.runAsUser(SparkHadoopUtil.scala:49) at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:178) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615) at java.lang.Thread.run(Thread.java:744) {noformat} -- This message was sent by Atlassian JIRA (v6.2#6252)
[jira] [Commented] (SPARK-1097) ConcurrentModificationException
[ https://issues.apache.org/jira/browse/SPARK-1097?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanelfocusedCommentId=14020278#comment-14020278 ] Jim Blomo commented on SPARK-1097: -- FYI still seeing this on spark 1.0, Hadoop 2.4 {code:java} java.util.ConcurrentModificationException (java.util.ConcurrentModificationException) java.util.HashMap$HashIterator.nextEntry(HashMap.java:922) java.util.HashMap$KeyIterator.next(HashMap.java:956) java.util.AbstractCollection.addAll(AbstractCollection.java:341) java.util.HashSet.init(HashSet.java:117) org.apache.hadoop.conf.Configuration.init(Configuration.java:671) com.amazon.ws.emr.hadoop.fs.EmrFileSystem.initialize(EmrFileSystem.java:98) org.apache.hadoop.fs.FileSystem.createFileSystem(FileSystem.java:2402) org.apache.hadoop.fs.FileSystem.access$200(FileSystem.java:89) org.apache.hadoop.fs.FileSystem$Cache.getInternal(FileSystem.java:2436) org.apache.hadoop.fs.FileSystem$Cache.get(FileSystem.java:2418) org.apache.hadoop.fs.FileSystem.get(FileSystem.java:373) org.apache.hadoop.fs.Path.getFileSystem(Path.java:296) org.apache.hadoop.mapred.LineRecordReader.init(LineRecordReader.java:107) org.apache.hadoop.mapred.TextInputFormat.getRecordReader(TextInputFormat.java:67) org.apache.spark.rdd.HadoopRDD$$anon$1.init(HadoopRDD.scala:190) org.apache.spark.rdd.HadoopRDD.compute(HadoopRDD.scala:181) org.apache.spark.rdd.HadoopRDD.compute(HadoopRDD.scala:93) org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:262) org.apache.spark.rdd.RDD.iterator(RDD.scala:229) org.apache.spark.rdd.MappedRDD.compute(MappedRDD.scala:31) org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:262) org.apache.spark.rdd.RDD.iterator(RDD.scala:229) org.apache.spark.api.python.PythonRDD$WriterThread$$anonfun$run$1.apply$mcV$sp(PythonRDD.scala:200) org.apache.spark.api.python.PythonRDD$WriterThread$$anonfun$run$1.apply(PythonRDD.scala:175) org.apache.spark.api.python.PythonRDD$WriterThread$$anonfun$run$1.apply(PythonRDD.scala:175) org.apache.spark.util.Utils$.logUncaughtExceptions(Utils.scala:1160) org.apache.spark.api.python.PythonRDD$WriterThread.run(PythonRDD.scala:174) {code} ConcurrentModificationException --- Key: SPARK-1097 URL: https://issues.apache.org/jira/browse/SPARK-1097 Project: Spark Issue Type: Bug Components: Spark Core Affects Versions: 0.9.0 Reporter: Fabrizio Milo Attachments: nravi_Conf_Spark-1388.patch {noformat} 14/02/16 08:18:45 WARN TaskSetManager: Loss was due to java.util.ConcurrentModificationException java.util.ConcurrentModificationException at java.util.HashMap$HashIterator.nextEntry(HashMap.java:926) at java.util.HashMap$KeyIterator.next(HashMap.java:960) at java.util.AbstractCollection.addAll(AbstractCollection.java:341) at java.util.HashSet.init(HashSet.java:117) at org.apache.hadoop.conf.Configuration.init(Configuration.java:554) at org.apache.hadoop.mapred.JobConf.init(JobConf.java:439) at org.apache.spark.rdd.HadoopRDD.getJobConf(HadoopRDD.scala:110) at org.apache.spark.rdd.HadoopRDD$$anon$1.init(HadoopRDD.scala:154) at org.apache.spark.rdd.HadoopRDD.compute(HadoopRDD.scala:149) at org.apache.spark.rdd.HadoopRDD.compute(HadoopRDD.scala:64) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:241) at org.apache.spark.rdd.RDD.iterator(RDD.scala:232) at org.apache.spark.rdd.MappedRDD.compute(MappedRDD.scala:31) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:241) at org.apache.spark.rdd.RDD.iterator(RDD.scala:232) at org.apache.spark.rdd.UnionPartition.iterator(UnionRDD.scala:32) at org.apache.spark.rdd.UnionRDD.compute(UnionRDD.scala:72) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:241) at org.apache.spark.rdd.RDD.iterator(RDD.scala:232) at org.apache.spark.rdd.FlatMappedRDD.compute(FlatMappedRDD.scala:33) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:241) at org.apache.spark.rdd.RDD.iterator(RDD.scala:232) at org.apache.spark.rdd.MappedRDD.compute(MappedRDD.scala:31) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:241) at org.apache.spark.rdd.RDD.iterator(RDD.scala:232) at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:161) at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:102) at org.apache.spark.scheduler.Task.run(Task.scala:53) at org.apache.spark.executor.Executor$TaskRunner$$anonfun$run$1.apply$mcV$sp(Executor.scala:213) at org.apache.spark.deploy.SparkHadoopUtil.runAsUser(SparkHadoopUtil.scala:49) at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:178) at
[jira] [Commented] (SPARK-1097) ConcurrentModificationException
[ https://issues.apache.org/jira/browse/SPARK-1097?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanelfocusedCommentId=14020336#comment-14020336 ] Tsuyoshi OZAWA commented on SPARK-1097: --- [~jblomo], thank you for reporting. This issue is fixed in next minor Hadoop release - 2.4.1. Note that 2.4.0 doesn't include the fix. ConcurrentModificationException --- Key: SPARK-1097 URL: https://issues.apache.org/jira/browse/SPARK-1097 Project: Spark Issue Type: Bug Components: Spark Core Affects Versions: 0.9.0 Reporter: Fabrizio Milo Attachments: nravi_Conf_Spark-1388.patch {noformat} 14/02/16 08:18:45 WARN TaskSetManager: Loss was due to java.util.ConcurrentModificationException java.util.ConcurrentModificationException at java.util.HashMap$HashIterator.nextEntry(HashMap.java:926) at java.util.HashMap$KeyIterator.next(HashMap.java:960) at java.util.AbstractCollection.addAll(AbstractCollection.java:341) at java.util.HashSet.init(HashSet.java:117) at org.apache.hadoop.conf.Configuration.init(Configuration.java:554) at org.apache.hadoop.mapred.JobConf.init(JobConf.java:439) at org.apache.spark.rdd.HadoopRDD.getJobConf(HadoopRDD.scala:110) at org.apache.spark.rdd.HadoopRDD$$anon$1.init(HadoopRDD.scala:154) at org.apache.spark.rdd.HadoopRDD.compute(HadoopRDD.scala:149) at org.apache.spark.rdd.HadoopRDD.compute(HadoopRDD.scala:64) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:241) at org.apache.spark.rdd.RDD.iterator(RDD.scala:232) at org.apache.spark.rdd.MappedRDD.compute(MappedRDD.scala:31) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:241) at org.apache.spark.rdd.RDD.iterator(RDD.scala:232) at org.apache.spark.rdd.UnionPartition.iterator(UnionRDD.scala:32) at org.apache.spark.rdd.UnionRDD.compute(UnionRDD.scala:72) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:241) at org.apache.spark.rdd.RDD.iterator(RDD.scala:232) at org.apache.spark.rdd.FlatMappedRDD.compute(FlatMappedRDD.scala:33) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:241) at org.apache.spark.rdd.RDD.iterator(RDD.scala:232) at org.apache.spark.rdd.MappedRDD.compute(MappedRDD.scala:31) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:241) at org.apache.spark.rdd.RDD.iterator(RDD.scala:232) at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:161) at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:102) at org.apache.spark.scheduler.Task.run(Task.scala:53) at org.apache.spark.executor.Executor$TaskRunner$$anonfun$run$1.apply$mcV$sp(Executor.scala:213) at org.apache.spark.deploy.SparkHadoopUtil.runAsUser(SparkHadoopUtil.scala:49) at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:178) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615) at java.lang.Thread.run(Thread.java:744) {noformat} -- This message was sent by Atlassian JIRA (v6.2#6252)
[jira] [Commented] (SPARK-1097) ConcurrentModificationException
[ https://issues.apache.org/jira/browse/SPARK-1097?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanelfocusedCommentId=14020353#comment-14020353 ] Nishkam Ravi commented on SPARK-1097: - Have initiated a PR for a workaround in Spark as well (for developers using 2.4.1): https://github.com/apache/spark/pull/1000 ConcurrentModificationException --- Key: SPARK-1097 URL: https://issues.apache.org/jira/browse/SPARK-1097 Project: Spark Issue Type: Bug Components: Spark Core Affects Versions: 0.9.0 Reporter: Fabrizio Milo Attachments: nravi_Conf_Spark-1388.patch {noformat} 14/02/16 08:18:45 WARN TaskSetManager: Loss was due to java.util.ConcurrentModificationException java.util.ConcurrentModificationException at java.util.HashMap$HashIterator.nextEntry(HashMap.java:926) at java.util.HashMap$KeyIterator.next(HashMap.java:960) at java.util.AbstractCollection.addAll(AbstractCollection.java:341) at java.util.HashSet.init(HashSet.java:117) at org.apache.hadoop.conf.Configuration.init(Configuration.java:554) at org.apache.hadoop.mapred.JobConf.init(JobConf.java:439) at org.apache.spark.rdd.HadoopRDD.getJobConf(HadoopRDD.scala:110) at org.apache.spark.rdd.HadoopRDD$$anon$1.init(HadoopRDD.scala:154) at org.apache.spark.rdd.HadoopRDD.compute(HadoopRDD.scala:149) at org.apache.spark.rdd.HadoopRDD.compute(HadoopRDD.scala:64) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:241) at org.apache.spark.rdd.RDD.iterator(RDD.scala:232) at org.apache.spark.rdd.MappedRDD.compute(MappedRDD.scala:31) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:241) at org.apache.spark.rdd.RDD.iterator(RDD.scala:232) at org.apache.spark.rdd.UnionPartition.iterator(UnionRDD.scala:32) at org.apache.spark.rdd.UnionRDD.compute(UnionRDD.scala:72) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:241) at org.apache.spark.rdd.RDD.iterator(RDD.scala:232) at org.apache.spark.rdd.FlatMappedRDD.compute(FlatMappedRDD.scala:33) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:241) at org.apache.spark.rdd.RDD.iterator(RDD.scala:232) at org.apache.spark.rdd.MappedRDD.compute(MappedRDD.scala:31) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:241) at org.apache.spark.rdd.RDD.iterator(RDD.scala:232) at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:161) at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:102) at org.apache.spark.scheduler.Task.run(Task.scala:53) at org.apache.spark.executor.Executor$TaskRunner$$anonfun$run$1.apply$mcV$sp(Executor.scala:213) at org.apache.spark.deploy.SparkHadoopUtil.runAsUser(SparkHadoopUtil.scala:49) at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:178) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615) at java.lang.Thread.run(Thread.java:744) {noformat} -- This message was sent by Atlassian JIRA (v6.2#6252)
[jira] [Commented] (SPARK-1097) ConcurrentModificationException
[ https://issues.apache.org/jira/browse/SPARK-1097?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanelfocusedCommentId=13958671#comment-13958671 ] Tsuyoshi OZAWA commented on SPARK-1097: --- A patch by Nishkam on HADOOP-10456 has been already reviewed and will be committed in a few days against hadoop's trunk. ConcurrentModificationException --- Key: SPARK-1097 URL: https://issues.apache.org/jira/browse/SPARK-1097 Project: Spark Issue Type: Bug Components: Spark Core Affects Versions: 0.9.0 Reporter: Fabrizio Milo Attachments: nravi_Conf_Spark-1388.patch {noformat} 14/02/16 08:18:45 WARN TaskSetManager: Loss was due to java.util.ConcurrentModificationException java.util.ConcurrentModificationException at java.util.HashMap$HashIterator.nextEntry(HashMap.java:926) at java.util.HashMap$KeyIterator.next(HashMap.java:960) at java.util.AbstractCollection.addAll(AbstractCollection.java:341) at java.util.HashSet.init(HashSet.java:117) at org.apache.hadoop.conf.Configuration.init(Configuration.java:554) at org.apache.hadoop.mapred.JobConf.init(JobConf.java:439) at org.apache.spark.rdd.HadoopRDD.getJobConf(HadoopRDD.scala:110) at org.apache.spark.rdd.HadoopRDD$$anon$1.init(HadoopRDD.scala:154) at org.apache.spark.rdd.HadoopRDD.compute(HadoopRDD.scala:149) at org.apache.spark.rdd.HadoopRDD.compute(HadoopRDD.scala:64) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:241) at org.apache.spark.rdd.RDD.iterator(RDD.scala:232) at org.apache.spark.rdd.MappedRDD.compute(MappedRDD.scala:31) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:241) at org.apache.spark.rdd.RDD.iterator(RDD.scala:232) at org.apache.spark.rdd.UnionPartition.iterator(UnionRDD.scala:32) at org.apache.spark.rdd.UnionRDD.compute(UnionRDD.scala:72) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:241) at org.apache.spark.rdd.RDD.iterator(RDD.scala:232) at org.apache.spark.rdd.FlatMappedRDD.compute(FlatMappedRDD.scala:33) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:241) at org.apache.spark.rdd.RDD.iterator(RDD.scala:232) at org.apache.spark.rdd.MappedRDD.compute(MappedRDD.scala:31) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:241) at org.apache.spark.rdd.RDD.iterator(RDD.scala:232) at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:161) at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:102) at org.apache.spark.scheduler.Task.run(Task.scala:53) at org.apache.spark.executor.Executor$TaskRunner$$anonfun$run$1.apply$mcV$sp(Executor.scala:213) at org.apache.spark.deploy.SparkHadoopUtil.runAsUser(SparkHadoopUtil.scala:49) at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:178) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615) at java.lang.Thread.run(Thread.java:744) {noformat} -- This message was sent by Atlassian JIRA (v6.2#6252)
[jira] [Commented] (SPARK-1097) ConcurrentModificationException
[ https://issues.apache.org/jira/browse/SPARK-1097?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanelfocusedCommentId=13957958#comment-13957958 ] Nishkam Ravi commented on SPARK-1097: - We can consider putting a workaround in Spark as well (for non-CDH users that may be running an older version of Hadoop and not updating it periodically). For now, this fix needs to go upstream, so we can backport it to CDH. The CDH-Spark bundle would then inherit this fix. The same issue has been noted in Hadoop-10456 as well. ConcurrentModificationException --- Key: SPARK-1097 URL: https://issues.apache.org/jira/browse/SPARK-1097 Project: Spark Issue Type: Bug Components: Spark Core Affects Versions: 0.9.0 Reporter: Fabrizio Milo Attachments: nravi_Conf_Spark-1388.patch {noformat} 14/02/16 08:18:45 WARN TaskSetManager: Loss was due to java.util.ConcurrentModificationException java.util.ConcurrentModificationException at java.util.HashMap$HashIterator.nextEntry(HashMap.java:926) at java.util.HashMap$KeyIterator.next(HashMap.java:960) at java.util.AbstractCollection.addAll(AbstractCollection.java:341) at java.util.HashSet.init(HashSet.java:117) at org.apache.hadoop.conf.Configuration.init(Configuration.java:554) at org.apache.hadoop.mapred.JobConf.init(JobConf.java:439) at org.apache.spark.rdd.HadoopRDD.getJobConf(HadoopRDD.scala:110) at org.apache.spark.rdd.HadoopRDD$$anon$1.init(HadoopRDD.scala:154) at org.apache.spark.rdd.HadoopRDD.compute(HadoopRDD.scala:149) at org.apache.spark.rdd.HadoopRDD.compute(HadoopRDD.scala:64) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:241) at org.apache.spark.rdd.RDD.iterator(RDD.scala:232) at org.apache.spark.rdd.MappedRDD.compute(MappedRDD.scala:31) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:241) at org.apache.spark.rdd.RDD.iterator(RDD.scala:232) at org.apache.spark.rdd.UnionPartition.iterator(UnionRDD.scala:32) at org.apache.spark.rdd.UnionRDD.compute(UnionRDD.scala:72) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:241) at org.apache.spark.rdd.RDD.iterator(RDD.scala:232) at org.apache.spark.rdd.FlatMappedRDD.compute(FlatMappedRDD.scala:33) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:241) at org.apache.spark.rdd.RDD.iterator(RDD.scala:232) at org.apache.spark.rdd.MappedRDD.compute(MappedRDD.scala:31) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:241) at org.apache.spark.rdd.RDD.iterator(RDD.scala:232) at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:161) at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:102) at org.apache.spark.scheduler.Task.run(Task.scala:53) at org.apache.spark.executor.Executor$TaskRunner$$anonfun$run$1.apply$mcV$sp(Executor.scala:213) at org.apache.spark.deploy.SparkHadoopUtil.runAsUser(SparkHadoopUtil.scala:49) at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:178) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615) at java.lang.Thread.run(Thread.java:744) {noformat} -- This message was sent by Atlassian JIRA (v6.2#6252)
[jira] [Commented] (SPARK-1097) ConcurrentModificationException
[ https://issues.apache.org/jira/browse/SPARK-1097?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanelfocusedCommentId=13957233#comment-13957233 ] Nishkam Ravi commented on SPARK-1097: - Attached is a patch for this issue. Verified with mvn test/compile/install. ConcurrentModificationException --- Key: SPARK-1097 URL: https://issues.apache.org/jira/browse/SPARK-1097 Project: Spark Issue Type: Bug Components: Spark Core Affects Versions: 0.9.0 Reporter: Fabrizio Milo Attachments: nravi_Conf_Spark-1388.patch {noformat} 14/02/16 08:18:45 WARN TaskSetManager: Loss was due to java.util.ConcurrentModificationException java.util.ConcurrentModificationException at java.util.HashMap$HashIterator.nextEntry(HashMap.java:926) at java.util.HashMap$KeyIterator.next(HashMap.java:960) at java.util.AbstractCollection.addAll(AbstractCollection.java:341) at java.util.HashSet.init(HashSet.java:117) at org.apache.hadoop.conf.Configuration.init(Configuration.java:554) at org.apache.hadoop.mapred.JobConf.init(JobConf.java:439) at org.apache.spark.rdd.HadoopRDD.getJobConf(HadoopRDD.scala:110) at org.apache.spark.rdd.HadoopRDD$$anon$1.init(HadoopRDD.scala:154) at org.apache.spark.rdd.HadoopRDD.compute(HadoopRDD.scala:149) at org.apache.spark.rdd.HadoopRDD.compute(HadoopRDD.scala:64) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:241) at org.apache.spark.rdd.RDD.iterator(RDD.scala:232) at org.apache.spark.rdd.MappedRDD.compute(MappedRDD.scala:31) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:241) at org.apache.spark.rdd.RDD.iterator(RDD.scala:232) at org.apache.spark.rdd.UnionPartition.iterator(UnionRDD.scala:32) at org.apache.spark.rdd.UnionRDD.compute(UnionRDD.scala:72) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:241) at org.apache.spark.rdd.RDD.iterator(RDD.scala:232) at org.apache.spark.rdd.FlatMappedRDD.compute(FlatMappedRDD.scala:33) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:241) at org.apache.spark.rdd.RDD.iterator(RDD.scala:232) at org.apache.spark.rdd.MappedRDD.compute(MappedRDD.scala:31) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:241) at org.apache.spark.rdd.RDD.iterator(RDD.scala:232) at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:161) at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:102) at org.apache.spark.scheduler.Task.run(Task.scala:53) at org.apache.spark.executor.Executor$TaskRunner$$anonfun$run$1.apply$mcV$sp(Executor.scala:213) at org.apache.spark.deploy.SparkHadoopUtil.runAsUser(SparkHadoopUtil.scala:49) at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:178) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615) at java.lang.Thread.run(Thread.java:744) {noformat} -- This message was sent by Atlassian JIRA (v6.2#6252)