Hello,

I have two weird effects when working with spark-shell:


1. This code executed in spark-shell causes an exception below. At the same time it works perfectly when submitted with spark-submit! :

import org.apache.hadoop.hbase.{HConstants, HBaseConfiguration}
import org.apache.hadoop.hbase.mapreduce.TableInputFormat
import org.apache.hadoop.hbase.io.ImmutableBytesWritable
import org.apache.hadoop.hbase.client.Result
import org.apache.mahout.math.VectorWritable
import com.google.common.io.ByteStreams
import org.apache.hadoop.hbase.util.Bytes
import org.apache.spark.SparkContext.rddToPairRDDFunctions
import org.apache.spark.rdd.RDD
import org.apache.spark.{SparkConf, SparkContext}

val hConf = HBaseConfiguration.create()
hConf.set("hbase.defaults.for.version.skip", "true")
hConf.set("hbase.defaults.for.version", "0.98.6-cdh5.2.0")
hConf.set(HConstants.ZOOKEEPER_QUORUM, "myserv")
hConf.set(HConstants.ZOOKEEPER_CLIENT_PORT, "2181")
hConf.set(TableInputFormat.INPUT_TABLE, "MyNS:MyTable")
val rdd = sc.newAPIHadoopRDD(hConf, classOf[TableInputFormat], classOf[ImmutableBytesWritable], classOf[Result])
rdd.count()

--- Exception ---

14/12/01 10:45:24 ERROR ExecutorUncaughtExceptionHandler: Uncaught exception in thread Thread[Executor task launch worker-0,5,main]
 java.lang.ExceptionInInitializerError
    at org.apache.hadoop.hbase.client.HTable.<init>(HTable.java:197)
    at org.apache.hadoop.hbase.client.HTable.<init>(HTable.java:159)
at org.apache.hadoop.hbase.mapreduce.TableInputFormat.setConf(TableInputFormat.java:101) at org.apache.spark.rdd.NewHadoopRDD$$anon$1.<init>(NewHadoopRDD.scala:113)
    at org.apache.spark.rdd.NewHadoopRDD.compute(NewHadoopRDD.scala:104)
    at org.apache.spark.rdd.NewHadoopRDD.compute(NewHadoopRDD.scala:66)
    at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:262)
    at org.apache.spark.rdd.RDD.iterator(RDD.scala:229)
    at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:62)
    at org.apache.spark.scheduler.Task.run(Task.scala:54)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:180) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615)
    at java.lang.Thread.run(Thread.java:745)
Caused by: java.lang.RuntimeException: hbase-default.xml file seems to be for and old version of HBase (null), this version is 0.98.6-cdh5.2.0 at org.apache.hadoop.hbase.HBaseConfiguration.checkDefaultsVersion(HBaseConfiguration.java:73) at org.apache.hadoop.hbase.HBaseConfiguration.addHbaseResources(HBaseConfiguration.java:105) at org.apache.hadoop.hbase.HBaseConfiguration.create(HBaseConfiguration.java:116) at org.apache.hadoop.hbase.client.HConnectionManager.<clinit>(HConnectionManager.java:222)
    ... 14 more

We have already checked most of the trivial stuff with class paths and existenceof tables and column groups, enabled HBase specific settings to avoid the version checking and so on. It appears that the supplied HBase configuration is completely ignored by context. We tried to solve this issue by instantiating own spark context and encountered the second weird effect:

2. when attempting to instantiate own SparkContext we get an exception below:

// imports block
...

|val conf = new SparkConf().setAppName("Simple Application")
val sc = new SparkContext(conf)

--- Exception ---

2014-12-01 10:42:24,966 WARN o.e.j.u.c.AbstractLifeCycle - FAILED SelectChannelConnector@0.0.0.0:4040: java.net.BindException: Die Adresse wird bereits verwendet
java.net.BindException: Die Adresse wird bereits verwendet
        at sun.nio.ch.Net.bind0(Native Method)
        at sun.nio.ch.Net.bind(Net.java:444)
        at sun.nio.ch.Net.bind(Net.java:436)
at sun.nio.ch.ServerSocketChannelImpl.bind(ServerSocketChannelImpl.java:214)
        at sun.nio.ch.ServerSocketAdaptor.bind(ServerSocketAdaptor.java:74)
at org.eclipse.jetty.server.nio.SelectChannelConnector.open(SelectChannelConnector.java:187) at org.eclipse.jetty.server.AbstractConnector.doStart(AbstractConnector.java:316) at org.eclipse.jetty.server.nio.SelectChannelConnector.doStart(SelectChannelConnector.java:265) at org.eclipse.jetty.util.component.AbstractLifeCycle.start(AbstractLifeCycle.java:64)
        at org.eclipse.jetty.server.Server.doStart(Server.java:293)
at org.eclipse.jetty.util.component.AbstractLifeCycle.start(AbstractLifeCycle.java:64) at org.apache.spark.ui.JettyUtils$.org$apache$spark$ui$JettyUtils$$connect$1(JettyUtils.scala:199) at org.apache.spark.ui.JettyUtils$$anonfun$4.apply(JettyUtils.scala:209) at org.apache.spark.ui.JettyUtils$$anonfun$4.apply(JettyUtils.scala:209) at org.apache.spark.util.Utils$$anonfun$startServiceOnPort$1.apply$mcVI$sp(Utils.scala:1449)
        at scala.collection.immutable.Range.foreach$mVc$sp(Range.scala:141)
at org.apache.spark.util.Utils$.startServiceOnPort(Utils.scala:1445) at org.apache.spark.ui.JettyUtils$.startJettyServer(JettyUtils.scala:209)
        at org.apache.spark.ui.WebUI.bind(WebUI.scala:102)
        at org.apache.spark.SparkContext.<init>(SparkContext.scala:224)
        at $line22.$read$$iwC$$iwC$$iwC$$iwC.<init>(<console>:24)
        at $line22.$read$$iwC$$iwC$$iwC.<init>(<console>:29)
        at $line22.$read$$iwC$$iwC.<init>(<console>:31)
        at $line22.$read$$iwC.<init>(<console>:33)
        at $line22.$read.<init>(<console>:35)
        at $line22.$read$.<init>(<console>:39)
        at $line22.$read$.<clinit>(<console>)
        at $line22.$eval$.<init>(<console>:7)
        at $line22.$eval$.<clinit>(<console>)
        at $line22.$eval.$print(<console>)
        at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57) at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
        at java.lang.reflect.Method.invoke(Method.java:606)
at org.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:846) at org.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1119) at org.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:672)
        at org.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:703)
        at org.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:667)
at org.apache.spark.repl.SparkILoop.reallyInterpret$1(SparkILoop.scala:819) at org.apache.spark.repl.SparkILoop.interpretStartingWith(SparkILoop.scala:864)
        at org.apache.spark.repl.SparkILoop.command(SparkILoop.scala:776)
at org.apache.spark.repl.SparkILoop.processLine$1(SparkILoop.scala:619) at org.apache.spark.repl.SparkILoop.innerLoop$1(SparkILoop.scala:627)
        at org.apache.spark.repl.SparkILoop.loop(SparkILoop.scala:632)
at org.apache.spark.repl.SparkILoop$$anonfun$process$1.apply$mcZ$sp(SparkILoop.scala:959) at org.apache.spark.repl.SparkILoop$$anonfun$process$1.apply(SparkILoop.scala:907) at org.apache.spark.repl.SparkILoop$$anonfun$process$1.apply(SparkILoop.scala:907) at scala.tools.nsc.util.ScalaClassLoader$.savingContextLoader(ScalaClassLoader.scala:135)
        at org.apache.spark.repl.SparkILoop.process(SparkILoop.scala:907)
        at org.apache.spark.repl.SparkILoop.process(SparkILoop.scala:1002)
        at org.apache.spark.repl.Main$.main(Main.scala:31)
        at org.apache.spark.repl.Main.main(Main.scala)
        at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57) at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
        at java.lang.reflect.Method.invoke(Method.java:606)
at org.apache.spark.deploy.SparkSubmit$.launch(SparkSubmit.scala:331)
        at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:75)
        at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)

Has anyone already encountered these things? I recall that in earlier spark-shell versions there was no issue with instantiating own spark contexts, is this new to spark 1.1.0?

Thank you for your help and kind regards
reinis
|

Reply via email to