Joseph K. Bradley created SPARK-15488: -----------------------------------------
Summary: Possible Accumulator bug causing OneVsRestSuite to be flaky Key: SPARK-15488 URL: https://issues.apache.org/jira/browse/SPARK-15488 Project: Spark Issue Type: Bug Components: ML, Spark Core Affects Versions: 2.0.0 Environment: Jenkins: branch-2.0, maven build, Hadoop 2.6 Reporter: Joseph K. Bradley OneVsRestSuite has been slightly flaky recently. The failure happens in the use of {{Range.par}}, which executes concurrent jobs which use the same DataFrame. This sometimes causes failures from {{java.util.ConcurrentModificationException}}. It appears the failure is from {{InMemoryRelation.batchStats}} being accessed. Since that is an instance of {{Accumulable}}, I'm guessing the bug is from recent Accumulator changes. Stack trace from this test run. * links: [https://spark-tests.appspot.com/test-logs/125719479] and [https://spark-tests.appspot.com/builds/spark-master-test-maven-hadoop-2.6/993] {code} java.util.ConcurrentModificationException at java.util.ArrayList$Itr.checkForComodification(ArrayList.java:901) at java.util.ArrayList$Itr.next(ArrayList.java:851) at java.util.Collections$UnmodifiableCollection$1.next(Collections.java:1042) at scala.collection.convert.Wrappers$JIteratorWrapper.next(Wrappers.scala:43) at scala.collection.Iterator$class.foreach(Iterator.scala:893) at scala.collection.AbstractIterator.foreach(Iterator.scala:1336) at scala.collection.IterableLike$class.foreach(IterableLike.scala:72) at scala.collection.AbstractIterable.foreach(Iterable.scala:54) at scala.collection.TraversableLike$class.map(TraversableLike.scala:234) at scala.collection.AbstractTraversable.map(Traversable.scala:104) at org.apache.spark.sql.execution.columnar.InMemoryRelation.computeSizeInBytes(InMemoryTableScanExec.scala:90) at org.apache.spark.sql.execution.columnar.InMemoryRelation.statistics(InMemoryTableScanExec.scala:113) at org.apache.spark.sql.execution.columnar.InMemoryRelation.statisticsToBePropagated(InMemoryTableScanExec.scala:97) at org.apache.spark.sql.execution.columnar.InMemoryRelation.withOutput(InMemoryTableScanExec.scala:191) at org.apache.spark.sql.execution.CacheManager$$anonfun$useCachedData$1$$anonfun$applyOrElse$1.apply(CacheManager.scala:144) at org.apache.spark.sql.execution.CacheManager$$anonfun$useCachedData$1$$anonfun$applyOrElse$1.apply(CacheManager.scala:144) at scala.Option.map(Option.scala:146) at org.apache.spark.sql.execution.CacheManager$$anonfun$useCachedData$1.applyOrElse(CacheManager.scala:144) at org.apache.spark.sql.execution.CacheManager$$anonfun$useCachedData$1.applyOrElse(CacheManager.scala:141) at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$3.apply(TreeNode.scala:265) at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$3.apply(TreeNode.scala:265) at org.apache.spark.sql.catalyst.trees.CurrentOrigin$.withOrigin(TreeNode.scala:68) at org.apache.spark.sql.catalyst.trees.TreeNode.transformDown(TreeNode.scala:264) at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$transformDown$1.apply(TreeNode.scala:270) at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$transformDown$1.apply(TreeNode.scala:270) at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$5.apply(TreeNode.scala:307) at scala.collection.Iterator$$anon$11.next(Iterator.scala:409) at scala.collection.Iterator$class.foreach(Iterator.scala:893) at scala.collection.AbstractIterator.foreach(Iterator.scala:1336) at scala.collection.generic.Growable$class.$plus$plus$eq(Growable.scala:59) at scala.collection.mutable.ArrayBuffer.$plus$plus$eq(ArrayBuffer.scala:104) at scala.collection.mutable.ArrayBuffer.$plus$plus$eq(ArrayBuffer.scala:48) at scala.collection.TraversableOnce$class.to(TraversableOnce.scala:310) at scala.collection.AbstractIterator.to(Iterator.scala:1336) at scala.collection.TraversableOnce$class.toBuffer(TraversableOnce.scala:302) at scala.collection.AbstractIterator.toBuffer(Iterator.scala:1336) at scala.collection.TraversableOnce$class.toArray(TraversableOnce.scala:289) at scala.collection.AbstractIterator.toArray(Iterator.scala:1336) at org.apache.spark.sql.catalyst.trees.TreeNode.transformChildren(TreeNode.scala:356) at org.apache.spark.sql.catalyst.trees.TreeNode.transformDown(TreeNode.scala:270) at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$transformDown$1.apply(TreeNode.scala:270) at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$transformDown$1.apply(TreeNode.scala:270) at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$5.apply(TreeNode.scala:307) at scala.collection.Iterator$$anon$11.next(Iterator.scala:409) at scala.collection.Iterator$class.foreach(Iterator.scala:893) at scala.collection.AbstractIterator.foreach(Iterator.scala:1336) at scala.collection.generic.Growable$class.$plus$plus$eq(Growable.scala:59) at scala.collection.mutable.ArrayBuffer.$plus$plus$eq(ArrayBuffer.scala:104) at scala.collection.mutable.ArrayBuffer.$plus$plus$eq(ArrayBuffer.scala:48) at scala.collection.TraversableOnce$class.to(TraversableOnce.scala:310) at scala.collection.AbstractIterator.to(Iterator.scala:1336) at scala.collection.TraversableOnce$class.toBuffer(TraversableOnce.scala:302) at scala.collection.AbstractIterator.toBuffer(Iterator.scala:1336) at scala.collection.TraversableOnce$class.toArray(TraversableOnce.scala:289) at scala.collection.AbstractIterator.toArray(Iterator.scala:1336) at org.apache.spark.sql.catalyst.trees.TreeNode.transformChildren(TreeNode.scala:356) at org.apache.spark.sql.catalyst.trees.TreeNode.transformDown(TreeNode.scala:270) at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$transformDown$1.apply(TreeNode.scala:270) at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$transformDown$1.apply(TreeNode.scala:270) at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$5.apply(TreeNode.scala:307) at scala.collection.Iterator$$anon$11.next(Iterator.scala:409) at scala.collection.Iterator$class.foreach(Iterator.scala:893) at scala.collection.AbstractIterator.foreach(Iterator.scala:1336) at scala.collection.generic.Growable$class.$plus$plus$eq(Growable.scala:59) at scala.collection.mutable.ArrayBuffer.$plus$plus$eq(ArrayBuffer.scala:104) at scala.collection.mutable.ArrayBuffer.$plus$plus$eq(ArrayBuffer.scala:48) at scala.collection.TraversableOnce$class.to(TraversableOnce.scala:310) at scala.collection.AbstractIterator.to(Iterator.scala:1336) at scala.collection.TraversableOnce$class.toBuffer(TraversableOnce.scala:302) at scala.collection.AbstractIterator.toBuffer(Iterator.scala:1336) at scala.collection.TraversableOnce$class.toArray(TraversableOnce.scala:289) at scala.collection.AbstractIterator.toArray(Iterator.scala:1336) at org.apache.spark.sql.catalyst.trees.TreeNode.transformChildren(TreeNode.scala:356) at org.apache.spark.sql.catalyst.trees.TreeNode.transformDown(TreeNode.scala:270) at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$transformDown$1.apply(TreeNode.scala:270) at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$transformDown$1.apply(TreeNode.scala:270) at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$5.apply(TreeNode.scala:307) at scala.collection.Iterator$$anon$11.next(Iterator.scala:409) at scala.collection.Iterator$class.foreach(Iterator.scala:893) at scala.collection.AbstractIterator.foreach(Iterator.scala:1336) at scala.collection.generic.Growable$class.$plus$plus$eq(Growable.scala:59) at scala.collection.mutable.ArrayBuffer.$plus$plus$eq(ArrayBuffer.scala:104) at scala.collection.mutable.ArrayBuffer.$plus$plus$eq(ArrayBuffer.scala:48) at scala.collection.TraversableOnce$class.to(TraversableOnce.scala:310) at scala.collection.AbstractIterator.to(Iterator.scala:1336) at scala.collection.TraversableOnce$class.toBuffer(TraversableOnce.scala:302) at scala.collection.AbstractIterator.toBuffer(Iterator.scala:1336) at scala.collection.TraversableOnce$class.toArray(TraversableOnce.scala:289) at scala.collection.AbstractIterator.toArray(Iterator.scala:1336) at org.apache.spark.sql.catalyst.trees.TreeNode.transformChildren(TreeNode.scala:356) at org.apache.spark.sql.catalyst.trees.TreeNode.transformDown(TreeNode.scala:270) at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$transformDown$1.apply(TreeNode.scala:270) at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$transformDown$1.apply(TreeNode.scala:270) at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$5.apply(TreeNode.scala:307) at scala.collection.Iterator$$anon$11.next(Iterator.scala:409) at scala.collection.Iterator$class.foreach(Iterator.scala:893) at scala.collection.AbstractIterator.foreach(Iterator.scala:1336) at scala.collection.generic.Growable$class.$plus$plus$eq(Growable.scala:59) at scala.collection.mutable.ArrayBuffer.$plus$plus$eq(ArrayBuffer.scala:104) at scala.collection.mutable.ArrayBuffer.$plus$plus$eq(ArrayBuffer.scala:48) at scala.collection.TraversableOnce$class.to(TraversableOnce.scala:310) at scala.collection.AbstractIterator.to(Iterator.scala:1336) at scala.collection.TraversableOnce$class.toBuffer(TraversableOnce.scala:302) at scala.collection.AbstractIterator.toBuffer(Iterator.scala:1336) at scala.collection.TraversableOnce$class.toArray(TraversableOnce.scala:289) at scala.collection.AbstractIterator.toArray(Iterator.scala:1336) at org.apache.spark.sql.catalyst.trees.TreeNode.transformChildren(TreeNode.scala:356) at org.apache.spark.sql.catalyst.trees.TreeNode.transformDown(TreeNode.scala:270) at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$transformDown$1.apply(TreeNode.scala:270) at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$transformDown$1.apply(TreeNode.scala:270) at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$5.apply(TreeNode.scala:307) at scala.collection.Iterator$$anon$11.next(Iterator.scala:409) at scala.collection.Iterator$class.foreach(Iterator.scala:893) at scala.collection.AbstractIterator.foreach(Iterator.scala:1336) at scala.collection.generic.Growable$class.$plus$plus$eq(Growable.scala:59) at scala.collection.mutable.ArrayBuffer.$plus$plus$eq(ArrayBuffer.scala:104) at scala.collection.mutable.ArrayBuffer.$plus$plus$eq(ArrayBuffer.scala:48) at scala.collection.TraversableOnce$class.to(TraversableOnce.scala:310) at scala.collection.AbstractIterator.to(Iterator.scala:1336) at scala.collection.TraversableOnce$class.toBuffer(TraversableOnce.scala:302) at scala.collection.AbstractIterator.toBuffer(Iterator.scala:1336) at scala.collection.TraversableOnce$class.toArray(TraversableOnce.scala:289) at scala.collection.AbstractIterator.toArray(Iterator.scala:1336) at org.apache.spark.sql.catalyst.trees.TreeNode.transformChildren(TreeNode.scala:356) at org.apache.spark.sql.catalyst.trees.TreeNode.transformDown(TreeNode.scala:270) at org.apache.spark.sql.execution.CacheManager.useCachedData(CacheManager.scala:141) at org.apache.spark.sql.execution.QueryExecution.withCachedData$lzycompute(QueryExecution.scala:70) at org.apache.spark.sql.execution.QueryExecution.withCachedData(QueryExecution.scala:67) at org.apache.spark.sql.execution.QueryExecution.optimizedPlan$lzycompute(QueryExecution.scala:73) at org.apache.spark.sql.execution.QueryExecution.optimizedPlan(QueryExecution.scala:73) at org.apache.spark.sql.execution.QueryExecution.sparkPlan$lzycompute(QueryExecution.scala:77) at org.apache.spark.sql.execution.QueryExecution.sparkPlan(QueryExecution.scala:75) at org.apache.spark.sql.execution.QueryExecution.executedPlan$lzycompute(QueryExecution.scala:82) at org.apache.spark.sql.execution.QueryExecution.executedPlan(QueryExecution.scala:82) at org.apache.spark.sql.execution.QueryExecution.toRdd$lzycompute(QueryExecution.scala:85) at org.apache.spark.sql.execution.QueryExecution.toRdd(QueryExecution.scala:85) at org.apache.spark.sql.Dataset.rdd$lzycompute(Dataset.scala:2286) at org.apache.spark.sql.Dataset.rdd(Dataset.scala:2283) at org.apache.spark.ml.classification.BinaryLogisticRegressionSummary.<init>(LogisticRegression.scala:850) at org.apache.spark.ml.classification.BinaryLogisticRegressionTrainingSummary.<init>(LogisticRegression.scala:817) at org.apache.spark.ml.classification.LogisticRegression.train(LogisticRegression.scala:449) at org.apache.spark.ml.classification.LogisticRegression.train(LogisticRegression.scala:262) at org.apache.spark.ml.classification.LogisticRegression.train(LogisticRegression.scala:162) at org.apache.spark.ml.Predictor.fit(Predictor.scala:90) at org.apache.spark.ml.Predictor.fit(Predictor.scala:71) at org.apache.spark.ml.Estimator.fit(Estimator.scala:61) at org.apache.spark.ml.classification.OneVsRest$$anonfun$14.apply(OneVsRest.scala:336) at org.apache.spark.ml.classification.OneVsRest$$anonfun$14.apply(OneVsRest.scala:325) at scala.collection.parallel.immutable.ParRange$ParRangeIterator.map2combiner(ParRange.scala:105) at scala.collection.parallel.ParIterableLike$Map.leaf(ParIterableLike.scala:1054) at scala.collection.parallel.Task$$anonfun$tryLeaf$1.apply$mcV$sp(Tasks.scala:49) at scala.collection.parallel.Task$$anonfun$tryLeaf$1.apply(Tasks.scala:48) at scala.collection.parallel.Task$$anonfun$tryLeaf$1.apply(Tasks.scala:48) at scala.collection.parallel.Task$class.tryLeaf(Tasks.scala:51) at scala.collection.parallel.ParIterableLike$Map.tryLeaf(ParIterableLike.scala:1051) at scala.collection.parallel.AdaptiveWorkStealingTasks$WrappedTask$class.internal(Tasks.scala:159) at scala.collection.parallel.AdaptiveWorkStealingForkJoinTasks$WrappedTask.internal(Tasks.scala:443) at scala.collection.parallel.AdaptiveWorkStealingTasks$WrappedTask$class.compute(Tasks.scala:149) at scala.collection.parallel.AdaptiveWorkStealingForkJoinTasks$WrappedTask.compute(Tasks.scala:443) at scala.concurrent.forkjoin.RecursiveAction.exec(RecursiveAction.java:160) at scala.concurrent.forkjoin.ForkJoinTask.doExec(ForkJoinTask.java:260) at scala.concurrent.forkjoin.ForkJoinPool$WorkQueue.runTask(ForkJoinPool.java:1339) at scala.concurrent.forkjoin.ForkJoinPool.runWorker(ForkJoinPool.java:1979) at scala.concurrent.forkjoin.ForkJoinWorkerThread.run(ForkJoinWorkerThread.java:107) {code} -- This message was sent by Atlassian JIRA (v6.3.4#6332) --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@spark.apache.org For additional commands, e-mail: issues-h...@spark.apache.org