[ https://issues.apache.org/jira/browse/FLINK-6662?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16021589#comment-16021589 ]
ASF GitHub Bot commented on FLINK-6662: --------------------------------------- Github user tillrohrmann commented on a diff in the pull request: https://github.com/apache/flink/pull/3972#discussion_r118068470 --- Diff: flink-runtime/src/main/java/org/apache/flink/runtime/checkpoint/ZooKeeperCompletedCheckpointStore.java --- @@ -376,8 +377,14 @@ private static CompletedCheckpoint retrieveCompletedCheckpoint(Tuple2<Retrievabl try { return stateHandlePath.f0.retrieveState(); - } catch (Exception e) { - throw new FlinkException("Could not retrieve checkpoint " + checkpointId + ". The state handle seems to be broken.", e); + } catch (ClassNotFoundException cnfe) { + throw new FlinkException("Could not retrieve checkpoint " + checkpointId + " from state handle under " + + stateHandlePath.f1 + ". This indicates that you are trying to recover from state written by an " + + "older Flink version which is not compatible. Try cleaning the state handle store.", cnfe); + } catch (IOException ioe) { + throw new FlinkException("Could not retrieve " + checkpointId + " worker from state handle under " + --- End diff -- Yes, a copy & paste error from my side. I will correct it. > ClassNotFoundException: o.a.f.r.j.t.JobSnapshottingSettings recovering job > -------------------------------------------------------------------------- > > Key: FLINK-6662 > URL: https://issues.apache.org/jira/browse/FLINK-6662 > Project: Flink > Issue Type: Bug > Components: JobManager, Mesos, State Backends, Checkpointing > Affects Versions: 1.3.0 > Reporter: Jared Stehler > Assignee: Till Rohrmann > > Running flink mesos on 1.3-release branch, I'm seeing the following error on > appmaster startup: > {noformat} > 2017-05-22 15:32:45.946 [flink-akka.actor.default-dispatcher-17] WARN > o.a.flink.mesos.runtime.clusterframework.MesosJobManager - Failed to recover > job 088027410f1a628e7dfc59dc23df3ded. > java.lang.Exception: Failed to retrieve the submitted job graph from state > handle. > at > org.apache.flink.runtime.jobmanager.ZooKeeperSubmittedJobGraphStore.recoverJobGraph(ZooKeeperSubmittedJobGraphStore.java:186) > at > org.apache.flink.runtime.jobmanager.JobManager$$anonfun$handleMessage$1$$anonfun$applyOrElse$1$$anonfun$apply$mcV$sp$1.apply$mcV$sp(JobManager.scala:536) > at > org.apache.flink.runtime.jobmanager.JobManager$$anonfun$handleMessage$1$$anonfun$applyOrElse$1$$anonfun$apply$mcV$sp$1.apply(JobManager.scala:533) > at > org.apache.flink.runtime.jobmanager.JobManager$$anonfun$handleMessage$1$$anonfun$applyOrElse$1$$anonfun$apply$mcV$sp$1.apply(JobManager.scala:533) > at scala.util.DynamicVariable.withValue(DynamicVariable.scala:58) > at > org.apache.flink.runtime.jobmanager.JobManager$$anonfun$handleMessage$1$$anonfun$applyOrElse$1.apply$mcV$sp(JobManager.scala:533) > at > org.apache.flink.runtime.jobmanager.JobManager$$anonfun$handleMessage$1$$anonfun$applyOrElse$1.apply(JobManager.scala:529) > at > org.apache.flink.runtime.jobmanager.JobManager$$anonfun$handleMessage$1$$anonfun$applyOrElse$1.apply(JobManager.scala:529) > at > scala.concurrent.impl.Future$PromiseCompletingRunnable.liftedTree1$1(Future.scala:24) > at > scala.concurrent.impl.Future$PromiseCompletingRunnable.run(Future.scala:24) > at akka.dispatch.TaskInvocation.run(AbstractDispatcher.scala:40) > at > akka.dispatch.ForkJoinExecutorConfigurator$AkkaForkJoinTask.exec(AbstractDispatcher.scala:397) > at > scala.concurrent.forkjoin.ForkJoinTask.doExec(ForkJoinTask.java:260) > at > scala.concurrent.forkjoin.ForkJoinPool$WorkQueue.runTask(ForkJoinPool.java:1339) > at > scala.concurrent.forkjoin.ForkJoinPool.runWorker(ForkJoinPool.java:1979) > at > scala.concurrent.forkjoin.ForkJoinWorkerThread.run(ForkJoinWorkerThread.java:107) > Caused by: java.lang.ClassNotFoundException: > org.apache.flink.runtime.jobgraph.tasks.JobSnapshottingSettings > at java.net.URLClassLoader.findClass(URLClassLoader.java:381) > at java.lang.ClassLoader.loadClass(ClassLoader.java:424) > at sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:335) > at java.lang.ClassLoader.loadClass(ClassLoader.java:357) > at java.lang.Class.forName0(Native Method) > at java.lang.Class.forName(Class.java:348) > at > org.apache.flink.util.InstantiationUtil$ClassLoaderObjectInputStream.resolveClass(InstantiationUtil.java:64) > at > java.io.ObjectInputStream.readNonProxyDesc(ObjectInputStream.java:1826) > at > java.io.ObjectInputStream.readClassDesc(ObjectInputStream.java:1713) > at > java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:2000) > at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1535) > at > java.io.ObjectInputStream.defaultReadFields(ObjectInputStream.java:2245) > at > java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:2169) > at > java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:2027) > at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1535) > at > java.io.ObjectInputStream.defaultReadFields(ObjectInputStream.java:2245) > at > java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:2169) > at > java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:2027) > at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1535) > at java.io.ObjectInputStream.readObject(ObjectInputStream.java:422) > at > org.apache.flink.util.InstantiationUtil.deserializeObject(InstantiationUtil.java:305) > at > org.apache.flink.runtime.state.RetrievableStreamStateHandle.retrieveState(RetrievableStreamStateHandle.java:58) > at > org.apache.flink.runtime.jobmanager.ZooKeeperSubmittedJobGraphStore.recoverJobGraph(ZooKeeperSubmittedJobGraphStore.java:184) > ... 15 common frames omitted > {noformat} -- This message was sent by Atlassian JIRA (v6.3.15#6346)