I am facing issue while connecting Apache Spark to Apache Cassandra
Datastore


>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
> *[root@bin]# ./spark-shell --jars
> ../jars/spark-cassandra-connector-assembly-2.0.3-36-g9a50162.jarUsing
> Spark's default log4j profile:
> org/apache/spark/log4j-defaults.propertiesSetting default log level to
> "WARN".To adjust logging level use sc.setLogLevel(newLevel). For SparkR,
> use setLogLevel(newLevel).17/07/23 23:12:56 WARN NativeCodeLoader: Unable
> to load native-hadoop library for your platform... using builtin-java
> classes where applicable17/07/23 23:13:01 WARN ObjectStore: Failed to get
> database global_temp, returning NoSuchObjectExceptionSpark context Web UI
> available at http://111.23.140.15:4040 <http://111.23.140.15:4040>Spark
> context available as 'sc' (master = spark://172.16.214.126:7077
> <http://172.16.214.126:7077>, app id = app-20170723231257-0008).Spark
> session available as 'spark'.Welcome to      ____              __     /
> __/__  ___ _____/ /__    _\ \/ _ \/ _ `/ __/  '_/   /___/ .__/\_,_/_/
> /_/\_\   version 2.2.0      /_/Using Scala version 2.11.8 (Java HotSpot(TM)
> 64-Bit Server VM, Java 1.8.0_131)Type in expressions to have them
> evaluated.Type :help for more information.scala> sc.stopscala> import
> com.datastax.spark.connector._, org.apache.spark.SparkContext,
> org.apache.spark.SparkContext._, org.apache.spark.SparkConfimport
> com.datastax.spark.connector._import org.apache.spark.SparkContextimport
> org.apache.spark.SparkContext._import org.apache.spark.SparkConfscala> val
> conf = new
> SparkConf(true).set("spark.cassandra.connection.host","172.16.214.41")conf:
> org.apache.spark.SparkConf = org.apache.spark.SparkConf@7d0e43d6scala> val
> sc = new SparkContext(conf)sc: org.apache.spark.SparkContext =
> org.apache.spark.SparkContext@202b5293scala> val test_spark_rdd =
> sc.cassandraTable("test_spark", "test")test_spark_rdd:
> com.datastax.spark.connector.rdd.CassandraTableScanRDD[com.datastax.spark.connector.CassandraRow]
> = CassandraTableScanRDD[0] at RDD at CassandraRDD.scala:16scala>
> test_spark_rdd.first17/07/23 23:15:04 WARN TaskSetManager: Lost task 0.0 in
> stage 0.0 (TID 0, 172.16.214.41, executor 0):
> java.lang.NoClassDefFoundError:
> scala/runtime/AbstractPartialFunction$mcJL$sp at
> java.lang.ClassLoader.defineClass1(Native Method) at
> java.lang.ClassLoader.defineClass(ClassLoader.java:763) at
> java.security.SecureClassLoader.defineClass(SecureClassLoader.java:142) at
> java.net.URLClassLoader.defineClass(URLClassLoader.java:467) at
> java.net.URLClassLoader.access$100(URLClassLoader.java:73) at
> java.net.URLClassLoader$1.run(URLClassLoader.java:368) at
> java.net.URLClassLoader$1.run(URLClassLoader.java:362) at
> java.security.AccessController.doPrivileged(Native Method) at
> java.net.URLClassLoader.findClass(URLClassLoader.java:361) at
> java.lang.ClassLoader.loadClass(ClassLoader.java:424) at
> sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:335) at
> java.lang.ClassLoader.loadClass(ClassLoader.java:357) at
> com.datastax.spark.connector.rdd.CassandraLimit$.limitForIterator(CassandraLimit.scala:21)
> at
> com.datastax.spark.connector.rdd.CassandraTableScanRDD.compute(CassandraTableScanRDD.scala:368)
> at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:323) at
> org.apache.spark.rdd.RDD.iterator(RDD.scala:287) at
> org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:87) at
> org.apache.spark.scheduler.Task.run(Task.scala:108) at
> org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:335) at
> java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
> at
> java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
> at java.lang.Thread.run(Thread.java:748)Caused by:
> java.lang.ClassNotFoundException:
> scala.runtime.AbstractPartialFunction$mcJL$sp at
> java.net.URLClassLoader.findClass(URLClassLoader.java:381) at
> java.lang.ClassLoader.loadClass(ClassLoader.java:424) at
> sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:335) at
> java.lang.ClassLoader.loadClass(ClassLoader.java:357) ... 22 more17/07/23
> 23:15:04 WARN TaskSetManager: Lost task 0.1 in stage 0.0 (TID 1,
> 172.16.214.41, executor 0): java.lang.NoClassDefFoundError:
> com/datastax/spark/connector/rdd/CassandraLimit$$anonfun$limitForIterator$1
> at
> com.datastax.spark.connector.rdd.CassandraLimit$.limitForIterator(CassandraLimit.scala:21)
> at
> com.datastax.spark.connector.rdd.CassandraTableScanRDD.compute(CassandraTableScanRDD.scala:368)
> at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:323) at
> org.apache.spark.rdd.RDD.iterator(RDD.scala:287) at
> org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:87) at
> org.apache.spark.scheduler.Task.run(Task.scala:108) at
> org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:335) at
> java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
> at
> java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
> at java.lang.Thread.run(Thread.java:748)17/07/23 23:15:04 ERROR
> TaskSetManager: Task 0 in stage 0.0 failed 4 times; aborting
> joborg.apache.spark.SparkException: Job aborted due to stage failure: Task
> 0 in stage 0.0 failed 4 times, most recent failure: Lost task 0.3 in stage
> 0.0 (TID 3, 172.16.214.41, executor 0): java.lang.NoClassDefFoundError:
> com/datastax/spark/connector/rdd/CassandraLimit$$anonfun$limitForIterator$1
> at
> com.datastax.spark.connector.rdd.CassandraLimit$.limitForIterator(CassandraLimit.scala:21)
> at
> com.datastax.spark.connector.rdd.CassandraTableScanRDD.compute(CassandraTableScanRDD.scala:368)
> at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:323) at
> org.apache.spark.rdd.RDD.iterator(RDD.scala:287) at
> org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:87) at
> org.apache.spark.scheduler.Task.run(Task.scala:108) at
> org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:335) at
> java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
> at
> java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
> at java.lang.Thread.run(Thread.java:748)Driver stacktrace:  at
> org.apache.spark.scheduler.DAGScheduler.org
> <http://org.apache.spark.scheduler.DAGScheduler.org>$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1499)
> at
> org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1487)
> at
> org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1486)
> at
> scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59)
> at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:48)  at
> org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1486)
> at
> org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:814)
> at
> org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:814)
> at scala.Option.foreach(Option.scala:257)  at
> org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:814)
> at
> org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:1714)
> at
> org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1669)
> at
> org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1658)
> at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:48)  at
> org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:630)  at
> org.apache.spark.SparkContext.runJob(SparkContext.scala:2022)  at
> org.apache.spark.SparkContext.runJob(SparkContext.scala:2043)  at
> org.apache.spark.SparkContext.runJob(SparkContext.scala:2062)  at
> org.apache.spark.rdd.RDD$$anonfun$take$1.apply(RDD.scala:1354)  at
> org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
> at
> org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112)
> at org.apache.spark.rdd.RDD.withScope(RDD.scala:362)  at
> org.apache.spark.rdd.RDD.take(RDD.scala:1327)  at
> com.datastax.spark.connector.rdd.CassandraRDD.take(CassandraRDD.scala:127)
> at
> com.datastax.spark.connector.rdd.CassandraRDD.take(CassandraRDD.scala:128)
> at org.apache.spark.rdd.RDD$$anonfun$first$1.apply(RDD.scala:1368)  at
> org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
> at
> org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112)
> at org.apache.spark.rdd.RDD.withScope(RDD.scala:362)  at
> org.apache.spark.rdd.RDD.first(RDD.scala:1367)  ... 52 elidedCaused by:
> java.lang.NoClassDefFoundError:
> com/datastax/spark/connector/rdd/CassandraLimit$$anonfun$limitForIterator$1
> at
> com.datastax.spark.connector.rdd.CassandraLimit$.limitForIterator(CassandraLimit.scala:21)
> at
> com.datastax.spark.connector.rdd.CassandraTableScanRDD.compute(CassandraTableScanRDD.scala:368)
> at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:323)  at
> org.apache.spark.rdd.RDD.iterator(RDD.scala:287)  at
> org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:87)  at
> org.apache.spark.scheduler.Task.run(Task.scala:108)  at
> org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:335)  at
> java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
> at
> java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
> at java.lang.Thread.run(Thread.java:748)scala>*


*I am fol*lowing
https://www.datastax.com/dev/blog/kindling-an-introduction-to-spark-with-cassandra-part-1

Any help will be highly appreciable.

Regards,

Kaushal

Reply via email to