Chirag, This worked for us:
spark-submit --master yarn-cluster --driver-class-path '/opt/cloudera/parcels/CDH/lib/hive/lib/*' --driver-java-options '-Dspark.executor.extraClassPath=/opt/cloudera/parcels/CDH/lib/hive/lib/*' ... Let me know, if you have any issues. On Fri, Feb 20, 2015 at 2:43 PM, chirag lakhani <chirag.lakh...@gmail.com> wrote: > I am trying to access a hive table using spark sql but I am having > trouble. I followed the instructions in a cloudera community board which > stated > > 1) Import hive jars into the class path > > export SPARK_CLASSPATH=$(find > /data/opt/cloudera/parcels/CDH-5.3.1-1.cdh5.3.1.p0.5/lib/hive/lib/ -name > '*.jar' -print0 | sed 's/\x0/:/g') > > 2) start the spark shell > > spark-shell > > 3) created a hive context > > val sqlContext = new org.apache.spark.sql.hive.HiveContext(sc) > > 4) then run query > > sqlContext.sql("FROM analytics.trainingdatafinal SELECT > *").collect().foreach(println) > > > When I do this it seems that it cannot find the table in the hive > metastore, I have put all of my cloudera parcels in the partition starting > with /data as opposed to the default location used by cloudera. Any > suggestions on what can be done? I am putting the error below > > > 15/02/20 13:43:01 ERROR Hive: > NoSuchObjectException(message:analytics.trainingdatafinal table not found) > at > org.apache.hadoop.hive.metastore.HiveMetaStore$HMSHandler.get_table(HiveMetaStore.java:1569) > at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) > at > sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57) > at > sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) > at java.lang.reflect.Method.invoke(Method.java:606) > at > org.apache.hadoop.hive.metastore.RetryingHMSHandler.invoke(RetryingHMSHandler.java:106) > at com.sun.proxy.$Proxy24.get_table(Unknown Source) > at > org.apache.hadoop.hive.metastore.HiveMetaStoreClient.getTable(HiveMetaStoreClient.java:1008) > at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) > at > sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57) > at > sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) > at java.lang.reflect.Method.invoke(Method.java:606) > at > org.apache.hadoop.hive.metastore.RetryingMetaStoreClient.invoke(RetryingMetaStoreClient.java:90) > at com.sun.proxy.$Proxy25.getTable(Unknown Source) > at org.apache.hadoop.hive.ql.metadata.Hive.getTable(Hive.java:1000) > at org.apache.hadoop.hive.ql.metadata.Hive.getTable(Hive.java:974) > at > org.apache.spark.sql.hive.HiveMetastoreCatalog.lookupRelation(HiveMetastoreCatalog.scala:70) > at org.apache.spark.sql.hive.HiveContext$$anon$2.org > $apache$spark$sql$catalyst$analysis$OverrideCatalog$$super$lookupRelation(HiveContext.scala:253) > at > org.apache.spark.sql.catalyst.analysis.OverrideCatalog$$anonfun$lookupRelation$3.apply(Catalog.scala:141) > at > org.apache.spark.sql.catalyst.analysis.OverrideCatalog$$anonfun$lookupRelation$3.apply(Catalog.scala:141) > at scala.Option.getOrElse(Option.scala:120) > at > org.apache.spark.sql.catalyst.analysis.OverrideCatalog$class.lookupRelation(Catalog.scala:141) > at > org.apache.spark.sql.hive.HiveContext$$anon$2.lookupRelation(HiveContext.scala:253) > at > org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveRelations$$anonfun$apply$5.applyOrElse(Analyzer.scala:143) > at > org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveRelations$$anonfun$apply$5.applyOrElse(Analyzer.scala:138) > at > org.apache.spark.sql.catalyst.trees.TreeNode.transformDown(TreeNode.scala:144) > at > org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$4.apply(TreeNode.scala:162) > at scala.collection.Iterator$$anon$11.next(Iterator.scala:328) > at scala.collection.Iterator$class.foreach(Iterator.scala:727) > at scala.collection.AbstractIterator.foreach(Iterator.scala:1157) > at scala.collection.generic.Growable$class.$plus$plus$eq(Growable.scala:48) > at > scala.collection.mutable.ArrayBuffer.$plus$plus$eq(ArrayBuffer.scala:103) > at scala.collection.mutable.ArrayBuffer.$plus$plus$eq(ArrayBuffer.scala:47) > at scala.collection.TraversableOnce$class.to(TraversableOnce.scala:273) > at scala.collection.AbstractIterator.to(Iterator.scala:1157) > at > scala.collection.TraversableOnce$class.toBuffer(TraversableOnce.scala:265) > at scala.collection.AbstractIterator.toBuffer(Iterator.scala:1157) > at > scala.collection.TraversableOnce$class.toArray(TraversableOnce.scala:252) > at scala.collection.AbstractIterator.toArray(Iterator.scala:1157) > at > org.apache.spark.sql.catalyst.trees.TreeNode.transformChildrenDown(TreeNode.scala:191) > at > org.apache.spark.sql.catalyst.trees.TreeNode.transformDown(TreeNode.scala:147) > at > org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$4.apply(TreeNode.scala:162) > at scala.collection.Iterator$$anon$11.next(Iterator.scala:328) > at scala.collection.Iterator$class.foreach(Iterator.scala:727) > at scala.collection.AbstractIterator.foreach(Iterator.scala:1157) > at scala.collection.generic.Growable$class.$plus$plus$eq(Growable.scala:48) > at > scala.collection.mutable.ArrayBuffer.$plus$plus$eq(ArrayBuffer.scala:103) > at scala.collection.mutable.ArrayBuffer.$plus$plus$eq(ArrayBuffer.scala:47) > at scala.collection.TraversableOnce$class.to(TraversableOnce.scala:273) > at scala.collection.AbstractIterator.to(Iterator.scala:1157) > at > scala.collection.TraversableOnce$class.toBuffer(TraversableOnce.scala:265) > at scala.collection.AbstractIterator.toBuffer(Iterator.scala:1157) > at > scala.collection.TraversableOnce$class.toArray(TraversableOnce.scala:252) > at scala.collection.AbstractIterator.toArray(Iterator.scala:1157) > at > org.apache.spark.sql.catalyst.trees.TreeNode.transformChildrenDown(TreeNode.scala:191) > at > org.apache.spark.sql.catalyst.trees.TreeNode.transformDown(TreeNode.scala:147) > at > org.apache.spark.sql.catalyst.trees.TreeNode.transform(TreeNode.scala:135) > at > org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveRelations$.apply(Analyzer.scala:138) > at > org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveRelations$.apply(Analyzer.scala:137) > at > org.apache.spark.sql.catalyst.rules.RuleExecutor$$anonfun$apply$1$$anonfun$apply$2.apply(RuleExecutor.scala:61) > at > org.apache.spark.sql.catalyst.rules.RuleExecutor$$anonfun$apply$1$$anonfun$apply$2.apply(RuleExecutor.scala:59) > at > scala.collection.LinearSeqOptimized$class.foldLeft(LinearSeqOptimized.scala:111) > at scala.collection.immutable.List.foldLeft(List.scala:84) > at > org.apache.spark.sql.catalyst.rules.RuleExecutor$$anonfun$apply$1.apply(RuleExecutor.scala:59) > at > org.apache.spark.sql.catalyst.rules.RuleExecutor$$anonfun$apply$1.apply(RuleExecutor.scala:51) > at scala.collection.immutable.List.foreach(List.scala:318) > at > org.apache.spark.sql.catalyst.rules.RuleExecutor.apply(RuleExecutor.scala:51) > at > org.apache.spark.sql.SQLContext$QueryExecution.analyzed$lzycompute(SQLContext.scala:411) > at > org.apache.spark.sql.SQLContext$QueryExecution.analyzed(SQLContext.scala:411) > at > org.apache.spark.sql.SQLContext$QueryExecution.withCachedData$lzycompute(SQLContext.scala:412) > at > org.apache.spark.sql.SQLContext$QueryExecution.withCachedData(SQLContext.scala:412) > at > org.apache.spark.sql.SQLContext$QueryExecution.optimizedPlan$lzycompute(SQLContext.scala:413) > at > org.apache.spark.sql.SQLContext$QueryExecution.optimizedPlan(SQLContext.scala:413) > at > org.apache.spark.sql.SQLContext$QueryExecution.sparkPlan$lzycompute(SQLContext.scala:418) > at > org.apache.spark.sql.SQLContext$QueryExecution.sparkPlan(SQLContext.scala:416) > at > org.apache.spark.sql.SQLContext$QueryExecution.executedPlan$lzycompute(SQLContext.scala:422) > at > org.apache.spark.sql.SQLContext$QueryExecution.executedPlan(SQLContext.scala:422) > at org.apache.spark.sql.SchemaRDD.collect(SchemaRDD.scala:444) > at org.apache.spark.sql.SchemaRDD.take(SchemaRDD.scala:446) > at $line9.$read$$iwC$$iwC$$iwC$$iwC.<init>(<console>:15) > at $line9.$read$$iwC$$iwC$$iwC.<init>(<console>:20) > at $line9.$read$$iwC$$iwC.<init>(<console>:22) > at $line9.$read$$iwC.<init>(<console>:24) > at $line9.$read.<init>(<console>:26) > at $line9.$read$.<init>(<console>:30) > at $line9.$read$.<clinit>(<console>) > at $line9.$eval$.<init>(<console>:7) > at $line9.$eval$.<clinit>(<console>) > at $line9.$eval.$print(<console>) > at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) > at > sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57) > at > sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) > at java.lang.reflect.Method.invoke(Method.java:606) > at > org.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:852) > at > org.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1125) > at org.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:674) > at org.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:705) > at org.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:669) > at org.apache.spark.repl.SparkILoop.reallyInterpret$1(SparkILoop.scala:828) > at > org.apache.spark.repl.SparkILoop.interpretStartingWith(SparkILoop.scala:873) > at org.apache.spark.repl.SparkILoop.command(SparkILoop.scala:785) > at org.apache.spark.repl.SparkILoop.processLine$1(SparkILoop.scala:628) > at org.apache.spark.repl.SparkILoop.innerLoop$1(SparkILoop.scala:636) > at org.apache.spark.repl.SparkILoop.loop(SparkILoop.scala:641) > at > org.apache.spark.repl.SparkILoop$$anonfun$process$1.apply$mcZ$sp(SparkILoop.scala:968) > at > org.apache.spark.repl.SparkILoop$$anonfun$process$1.apply(SparkILoop.scala:916) > at > org.apache.spark.repl.SparkILoop$$anonfun$process$1.apply(SparkILoop.scala:916) > at > scala.tools.nsc.util.ScalaClassLoader$.savingContextLoader(ScalaClassLoader.scala:135) > at org.apache.spark.repl.SparkILoop.process(SparkILoop.scala:916) > at org.apache.spark.repl.SparkILoop.process(SparkILoop.scala:1011) > at org.apache.spark.repl.Main$.main(Main.scala:31) > at org.apache.spark.repl.Main.main(Main.scala) > at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) > at > sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57) > at > sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) > at java.lang.reflect.Method.invoke(Method.java:606) > at org.apache.spark.deploy.SparkSubmit$.launch(SparkSubmit.scala:358) > at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:75) > at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala) > > org.apache.hadoop.hive.ql.metadata.InvalidTableException: Table not found > trainingdatafinal > at org.apache.hadoop.hive.ql.metadata.Hive.getTable(Hive.java:1004) > at org.apache.hadoop.hive.ql.metadata.Hive.getTable(Hive.java:974) > at > org.apache.spark.sql.hive.HiveMetastoreCatalog.lookupRelation(HiveMetastoreCatalog.scala:70) > at org.apache.spark.sql.hive.HiveContext$$anon$2.org > $apache$spark$sql$catalyst$analysis$OverrideCatalog$$super$lookupRelation(HiveContext.scala:253) > at > org.apache.spark.sql.catalyst.analysis.OverrideCatalog$$anonfun$lookupRelation$3.apply(Catalog.scala:141) > at > org.apache.spark.sql.catalyst.analysis.OverrideCatalog$$anonfun$lookupRelation$3.apply(Catalog.scala:141) > at scala.Option.getOrElse(Option.scala:120) > at > org.apache.spark.sql.catalyst.analysis.OverrideCatalog$class.lookupRelation(Catalog.scala:141) > at > org.apache.spark.sql.hive.HiveContext$$anon$2.lookupRelation(HiveContext.scala:253) > at > org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveRelations$$anonfun$apply$5.applyOrElse(Analyzer.scala:143) > at > org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveRelations$$anonfun$apply$5.applyOrElse(Analyzer.scala:138) > at > org.apache.spark.sql.catalyst.trees.TreeNode.transformDown(TreeNode.scala:144) > at > org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$4.apply(TreeNode.scala:162) > at scala.collection.Iterator$$anon$11.next(Iterator.scala:328) > at scala.collection.Iterator$class.foreach(Iterator.scala:727) > at scala.collection.AbstractIterator.foreach(Iterator.scala:1157) > at scala.collection.generic.Growable$class.$plus$plus$eq(Growable.scala:48) > at > scala.collection.mutable.ArrayBuffer.$plus$plus$eq(ArrayBuffer.scala:103) > at scala.collection.mutable.ArrayBuffer.$plus$plus$eq(ArrayBuffer.scala:47) > at scala.collection.TraversableOnce$class.to(TraversableOnce.scala:273) > at scala.collection.AbstractIterator.to(Iterator.scala:1157) > at > scala.collection.TraversableOnce$class.toBuffer(TraversableOnce.scala:265) > at scala.collection.AbstractIterator.toBuffer(Iterator.scala:1157) > at > scala.collection.TraversableOnce$class.toArray(TraversableOnce.scala:252) > at scala.collection.AbstractIterator.toArray(Iterator.scala:1157) > at > org.apache.spark.sql.catalyst.trees.TreeNode.transformChildrenDown(TreeNode.scala:191) > at > org.apache.spark.sql.catalyst.trees.TreeNode.transformDown(TreeNode.scala:147) > at > org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$4.apply(TreeNode.scala:162) > at scala.collection.Iterator$$anon$11.next(Iterator.scala:328) > at scala.collection.Iterator$class.foreach(Iterator.scala:727) > at scala.collection.AbstractIterator.foreach(Iterator.scala:1157) > at scala.collection.generic.Growable$class.$plus$plus$eq(Growable.scala:48) > at > scala.collection.mutable.ArrayBuffer.$plus$plus$eq(ArrayBuffer.scala:103) > at scala.collection.mutable.ArrayBuffer.$plus$plus$eq(ArrayBuffer.scala:47) > at scala.collection.TraversableOnce$class.to(TraversableOnce.scala:273) > at scala.collection.AbstractIterator.to(Iterator.scala:1157) > at > scala.collection.TraversableOnce$class.toBuffer(TraversableOnce.scala:265) > at scala.collection.AbstractIterator.toBuffer(Iterator.scala:1157) > at > scala.collection.TraversableOnce$class.toArray(TraversableOnce.scala:252) > at scala.collection.AbstractIterator.toArray(Iterator.scala:1157) > at > org.apache.spark.sql.catalyst.trees.TreeNode.transformChildrenDown(TreeNode.scala:191) > at > org.apache.spark.sql.catalyst.trees.TreeNode.transformDown(TreeNode.scala:147) > at > org.apache.spark.sql.catalyst.trees.TreeNode.transform(TreeNode.scala:135) > at > org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveRelations$.apply(Analyzer.scala:138) > at > org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveRelations$.apply(Analyzer.scala:137) > at > org.apache.spark.sql.catalyst.rules.RuleExecutor$$anonfun$apply$1$$anonfun$apply$2.apply(RuleExecutor.scala:61) > at > org.apache.spark.sql.catalyst.rules.RuleExecutor$$anonfun$apply$1$$anonfun$apply$2.apply(RuleExecutor.scala:59) > at > scala.collection.LinearSeqOptimized$class.foldLeft(LinearSeqOptimized.scala:111) > at scala.collection.immutable.List.foldLeft(List.scala:84) > at > org.apache.spark.sql.catalyst.rules.RuleExecutor$$anonfun$apply$1.apply(RuleExecutor.scala:59) > at > org.apache.spark.sql.catalyst.rules.RuleExecutor$$anonfun$apply$1.apply(RuleExecutor.scala:51) > at scala.collection.immutable.List.foreach(List.scala:318) > at > org.apache.spark.sql.catalyst.rules.RuleExecutor.apply(RuleExecutor.scala:51) > at > org.apache.spark.sql.SQLContext$QueryExecution.analyzed$lzycompute(SQLContext.scala:411) > at > org.apache.spark.sql.SQLContext$QueryExecution.analyzed(SQLContext.scala:411) > at > org.apache.spark.sql.SQLContext$QueryExecution.withCachedData$lzycompute(SQLContext.scala:412) > at > org.apache.spark.sql.SQLContext$QueryExecution.withCachedData(SQLContext.scala:412) > at > org.apache.spark.sql.SQLContext$QueryExecution.optimizedPlan$lzycompute(SQLContext.scala:413) > at > org.apache.spark.sql.SQLContext$QueryExecution.optimizedPlan(SQLContext.scala:413) > at > org.apache.spark.sql.SQLContext$QueryExecution.sparkPlan$lzycompute(SQLContext.scala:418) > at > org.apache.spark.sql.SQLContext$QueryExecution.sparkPlan(SQLContext.scala:416) > at > org.apache.spark.sql.SQLContext$QueryExecution.executedPlan$lzycompute(SQLContext.scala:422) > at > org.apache.spark.sql.SQLContext$QueryExecution.executedPlan(SQLContext.scala:422) > at org.apache.spark.sql.SchemaRDD.collect(SchemaRDD.scala:444) > at org.apache.spark.sql.SchemaRDD.take(SchemaRDD.scala:446) > at $iwC$$iwC$$iwC$$iwC.<init>(<console>:15) > at $iwC$$iwC$$iwC.<init>(<console>:20) > at $iwC$$iwC.<init>(<console>:22) > at $iwC.<init>(<console>:24) > at <init>(<console>:26) > at .<init>(<console>:30) > at .<clinit>(<console>) > at .<init>(<console>:7) > at .<clinit>(<console>) > at $print(<console>) > at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) > at > sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57) > at > sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) > at java.lang.reflect.Method.invoke(Method.java:606) > at > org.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:852) > at > org.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1125) > at org.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:674) > at org.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:705) > at org.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:669) > at org.apache.spark.repl.SparkILoop.reallyInterpret$1(SparkILoop.scala:828) > at > org.apache.spark.repl.SparkILoop.interpretStartingWith(SparkILoop.scala:873) > at org.apache.spark.repl.SparkILoop.command(SparkILoop.scala:785) > at org.apache.spark.repl.SparkILoop.processLine$1(SparkILoop.scala:628) > at org.apache.spark.repl.SparkILoop.innerLoop$1(SparkILoop.scala:636) > at org.apache.spark.repl.SparkILoop.loop(SparkILoop.scala:641) > at > org.apache.spark.repl.SparkILoop$$anonfun$process$1.apply$mcZ$sp(SparkILoop.scala:968) > at > org.apache.spark.repl.SparkILoop$$anonfun$process$1.apply(SparkILoop.scala:916) > at > org.apache.spark.repl.SparkILoop$$anonfun$process$1.apply(SparkILoop.scala:916) > at > scala.tools.nsc.util.ScalaClassLoader$.savingContextLoader(ScalaClassLoader.scala:135) > at org.apache.spark.repl.SparkILoop.process(SparkILoop.scala:916) > at org.apache.spark.repl.SparkILoop.process(SparkILoop.scala:1011) > at org.apache.spark.repl.Main$.main(Main.scala:31) > at org.apache.spark.repl.Main.main(Main.scala) > at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) > at > sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57) > at > sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) > at java.lang.reflect.Method.invoke(Method.java:606) > at org.apache.spark.deploy.SparkSubmit$.launch(SparkSubmit.scala:358) > at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:75) > at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala) > > > -- *Gna Phetsarath*C: +1 917.373.7363 AIM: sphetsarath20 VVMR: 8890237 Address | 54 West 40th Street, New York, NY 10018