[ 
https://issues.apache.org/jira/browse/SPARK-1867?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=14987190#comment-14987190
 ] 

melvin mendoza commented on SPARK-1867:
---------------------------------------

[~srowen] having problem with spark

java.lang.IllegalStateException: unread block data
        at 
java.io.ObjectInputStream$BlockDataInputStream.setBlockDataMode(ObjectInputStream.java:2421)
        at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1382)
        at 
java.io.ObjectInputStream.defaultReadFields(ObjectInputStream.java:1990)
        at java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:1915)
        at 
java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:1798)
        at java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1350)
        at java.io.ObjectInputStream.readObject(ObjectInputStream.java:370)
        at 
org.apache.spark.serializer.JavaDeserializationStream.readObject(JavaSerializer.scala:68)
        at 
org.apache.spark.serializer.JavaSerializerInstance.deserialize(JavaSerializer.scala:94)
        at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:185)
        at 
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145)
        at 
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615)
        at java.lang.Thread.run(Thread.java:745)

Code snippet: 
  def main(args: Array[String]) {
    val LOG = Logger.getLogger(this.getClass().getName() + "Testing")
    LOG.info("SAMPLE START")
    LOG.info("Testing")

    try {
      val conf = new SparkConf()
      val sc = new SparkContext(conf)
      val phoenixSpark = sc.phoenixTableAsRDD(
        "SAMPLE_TABLE",
        Seq("ID", "NAME"),
        zkUrl = Some("r3r31gateway.clustered.com:2181:/hbase-unsecure"))

      val name = phoenixSpark.map(f => f.toString())
      val sample = phoenixSpark.map(f => (f.get("ID") + "," + f.get("NAME")))

      sample.foreach(println)
      LOG.info("SAMPLE TABLE: " + name.toString())
      sc.stop()

    } catch {
      case e: Exception => {
        e.printStackTrace()
        val msg = e.getMessage
        LOG.error("Phoenix Testing failure: errorMsg: " + msg)
      }
    }
  }


> Spark Documentation Error causes java.lang.IllegalStateException: unread 
> block data
> -----------------------------------------------------------------------------------
>
>                 Key: SPARK-1867
>                 URL: https://issues.apache.org/jira/browse/SPARK-1867
>             Project: Spark
>          Issue Type: Bug
>          Components: Spark Core
>            Reporter: sam
>
> I've employed two System Administrators on a contract basis (for quite a bit 
> of money), and both contractors have independently hit the following 
> exception.  What we are doing is:
> 1. Installing Spark 0.9.1 according to the documentation on the website, 
> along with CDH4 (and another cluster with CDH5) distros of hadoop/hdfs.
> 2. Building a fat jar with a Spark app with sbt then trying to run it on the 
> cluster
> I've also included code snippets, and sbt deps at the bottom.
> When I've Googled this, there seems to be two somewhat vague responses:
> a) Mismatching spark versions on nodes/user code
> b) Need to add more jars to the SparkConf
> Now I know that (b) is not the problem having successfully run the same code 
> on other clusters while only including one jar (it's a fat jar).
> But I have no idea how to check for (a) - it appears Spark doesn't have any 
> version checks or anything - it would be nice if it checked versions and 
> threw a "mismatching version exception: you have user code using version X 
> and node Y has version Z".
> I would be very grateful for advice on this.
> The exception:
> Exception in thread "main" org.apache.spark.SparkException: Job aborted: Task 
> 0.0:1 failed 32 times (most recent failure: Exception failure: 
> java.lang.IllegalStateException: unread block data)
>       at 
> org.apache.spark.scheduler.DAGScheduler$$anonfun$org$apache$spark$scheduler$DAGScheduler$$abortStage$1.apply(DAGScheduler.scala:1020)
>       at 
> org.apache.spark.scheduler.DAGScheduler$$anonfun$org$apache$spark$scheduler$DAGScheduler$$abortStage$1.apply(DAGScheduler.scala:1018)
>       at 
> scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59)
>       at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:47)
>       at 
> org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$abortStage(DAGScheduler.scala:1018)
>       at 
> org.apache.spark.scheduler.DAGScheduler$$anonfun$processEvent$10.apply(DAGScheduler.scala:604)
>       at 
> org.apache.spark.scheduler.DAGScheduler$$anonfun$processEvent$10.apply(DAGScheduler.scala:604)
>       at scala.Option.foreach(Option.scala:236)
>       at 
> org.apache.spark.scheduler.DAGScheduler.processEvent(DAGScheduler.scala:604)
>       at 
> org.apache.spark.scheduler.DAGScheduler$$anonfun$start$1$$anon$2$$anonfun$receive$1.applyOrElse(DAGScheduler.scala:190)
>       at akka.actor.ActorCell.receiveMessage(ActorCell.scala:498)
>       at akka.actor.ActorCell.invoke(ActorCell.scala:456)
>       at akka.dispatch.Mailbox.processMailbox(Mailbox.scala:237)
>       at akka.dispatch.Mailbox.run(Mailbox.scala:219)
>       at 
> akka.dispatch.ForkJoinExecutorConfigurator$AkkaForkJoinTask.exec(AbstractDispatcher.scala:386)
>       at scala.concurrent.forkjoin.ForkJoinTask.doExec(ForkJoinTask.java:260)
>       at 
> scala.concurrent.forkjoin.ForkJoinPool$WorkQueue.runTask(ForkJoinPool.java:1339)
>       at 
> scala.concurrent.forkjoin.ForkJoinPool.runWorker(ForkJoinPool.java:1979)
>       at 
> scala.concurrent.forkjoin.ForkJoinWorkerThread.run(ForkJoinWorkerThread.java:107)
> 14/05/16 18:05:31 INFO scheduler.TaskSetManager: Loss was due to 
> java.lang.IllegalStateException: unread block data [duplicate 59]
> My code snippet:
> val conf = new SparkConf()
>                .setMaster(clusterMaster)
>                .setAppName(appName)
>                .setSparkHome(sparkHome)
>                .setJars(SparkContext.jarOfClass(this.getClass))
> println("count = " + new SparkContext(conf).textFile(someHdfsPath).count())
> My SBT dependencies:
> // relevant
> "org.apache.spark" % "spark-core_2.10" % "0.9.1",
> "org.apache.hadoop" % "hadoop-client" % "2.3.0-mr1-cdh5.0.0",
> // standard, probably unrelated
> "com.github.seratch" %% "awscala" % "[0.2,)",
> "org.scalacheck" %% "scalacheck" % "1.10.1" % "test",
> "org.specs2" %% "specs2" % "1.14" % "test",
> "org.scala-lang" % "scala-reflect" % "2.10.3",
> "org.scalaz" %% "scalaz-core" % "7.0.5",
> "net.minidev" % "json-smart" % "1.2"



--
This message was sent by Atlassian JIRA
(v6.3.4#6332)

---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscr...@spark.apache.org
For additional commands, e-mail: issues-h...@spark.apache.org

Reply via email to