Hello Spark Community,

I am reading HBase table from Spark and getting RDD but now i wants to
convert RDD of Spark Rows and want to convert to DF.

*Source Code:*

bin/spark-shell --packages
it.nerdammer.bigdata:spark-hbase-connector_2.10:1.0.3 --conf
spark.hbase.host=127.0.0.1

import it.nerdammer.spark.hbase._
import org.apache.spark.{SparkConf, SparkContext}
import org.apache.spark.sql.Row
import org.apache.spark.sql.types.StructType
import org.apache.spark.sql.types.StructField
import org.apache.spark.sql.types.StringType

val sparkConf = new SparkConf().setAppName("HBase Spark POC")

val sparkContext = new SparkContext(sparkConf)

val sqlContext = new org.apache.spark.sql.SQLContext(sparkContext)

val hBaseRDD = sc.hbaseTable[(Option[String], Option[Int], Option[Int],
Option[Int], Option[Int], Option[Int])]("university").select("maths",
"english","science","history","computer").inColumnFamily("school")

val rowRDD = hBaseRDD.map(i =>
Row(i._1.get,i._2.get,i._3.get,i._4.get,i._5.get,i._6.get))

val stdSchemaString= "Rowid,maths,english,science,history,computer"

val stdSchema= StructType(stdSchemaString.split(",").map(fieldName =>
StructField(fieldName, StringType, true)))

val stdDf = sqlContext.createDataFrame(rowRDD,stdSchema);

// Getting Error

stdDf.registerTempTable("student")

sqlContext.sql("select * from student").show()

*Error*

scala> val stdDf = sqlContext.createDataFrame(rowRDD,stdSchema);
16/12/28 20:50:59 ERROR metastore.RetryingHMSHandler:
AlreadyExistsException(message:Database default already exists)
at
org.apache.hadoop.hive.metastore.HiveMetaStore$HMSHandler.create_database(HiveMetaStore.java:891)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at
sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at
sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at
org.apache.hadoop.hive.metastore.RetryingHMSHandler.invoke(RetryingHMSHandler.java:107)
at com.sun.proxy.$Proxy21.create_database(Unknown Source)
at
org.apache.hadoop.hive.metastore.HiveMetaStoreClient.createDatabase(HiveMetaStoreClient.java:644)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at
sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at
sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at
org.apache.hadoop.hive.metastore.RetryingMetaStoreClient.invoke(RetryingMetaStoreClient.java:156)
at com.sun.proxy.$Proxy22.createDatabase(Unknown Source)
at org.apache.hadoop.hive.ql.metadata.Hive.createDatabase(Hive.java:306)
at
org.apache.spark.sql.hive.client.HiveClientImpl$$anonfun$createDatabase$1.apply$mcV$sp(HiveClientImpl.scala:309)
at
org.apache.spark.sql.hive.client.HiveClientImpl$$anonfun$createDatabase$1.apply(HiveClientImpl.scala:309)
at
org.apache.spark.sql.hive.client.HiveClientImpl$$anonfun$createDatabase$1.apply(HiveClientImpl.scala:309)
at
org.apache.spark.sql.hive.client.HiveClientImpl$$anonfun$withHiveState$1.apply(HiveClientImpl.scala:280)
at
org.apache.spark.sql.hive.client.HiveClientImpl.liftedTree1$1(HiveClientImpl.scala:227)
at
org.apache.spark.sql.hive.client.HiveClientImpl.retryLocked(HiveClientImpl.scala:226)
at
org.apache.spark.sql.hive.client.HiveClientImpl.withHiveState(HiveClientImpl.scala:269)
at
org.apache.spark.sql.hive.client.HiveClientImpl.createDatabase(HiveClientImpl.scala:308)
at
org.apache.spark.sql.hive.HiveExternalCatalog$$anonfun$createDatabase$1.apply$mcV$sp(HiveExternalCatalog.scala:99)
at
org.apache.spark.sql.hive.HiveExternalCatalog$$anonfun$createDatabase$1.apply(HiveExternalCatalog.scala:99)
at
org.apache.spark.sql.hive.HiveExternalCatalog$$anonfun$createDatabase$1.apply(HiveExternalCatalog.scala:99)
at
org.apache.spark.sql.hive.HiveExternalCatalog.withClient(HiveExternalCatalog.scala:72)
at
org.apache.spark.sql.hive.HiveExternalCatalog.createDatabase(HiveExternalCatalog.scala:98)
at
org.apache.spark.sql.catalyst.catalog.SessionCatalog.createDatabase(SessionCatalog.scala:147)
at
org.apache.spark.sql.catalyst.catalog.SessionCatalog.<init>(SessionCatalog.scala:89)
at
org.apache.spark.sql.hive.HiveSessionCatalog.<init>(HiveSessionCatalog.scala:51)
at
org.apache.spark.sql.hive.HiveSessionState.catalog$lzycompute(HiveSessionState.scala:49)
at
org.apache.spark.sql.hive.HiveSessionState.catalog(HiveSessionState.scala:48)
at
org.apache.spark.sql.hive.HiveSessionState$$anon$1.<init>(HiveSessionState.scala:63)
at
org.apache.spark.sql.hive.HiveSessionState.analyzer$lzycompute(HiveSessionState.scala:63)
at
org.apache.spark.sql.hive.HiveSessionState.analyzer(HiveSessionState.scala:62)
at
org.apache.spark.sql.execution.QueryExecution.assertAnalyzed(QueryExecution.scala:49)
at org.apache.spark.sql.Dataset$.ofRows(Dataset.scala:64)
at org.apache.spark.sql.SparkSession.createDataFrame(SparkSession.scala:542)
at org.apache.spark.sql.SparkSession.createDataFrame(SparkSession.scala:302)
at org.apache.spark.sql.SQLContext.createDataFrame(SQLContext.scala:337)
at
$line34.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw.<init>(<console>:42)
at $line34.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw.<init>(<console>:47)
at $line34.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw.<init>(<console>:49)
at $line34.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw.<init>(<console>:51)
at $line34.$read$$iw$$iw$$iw$$iw$$iw$$iw.<init>(<console>:53)
at $line34.$read$$iw$$iw$$iw$$iw$$iw.<init>(<console>:55)
at $line34.$read$$iw$$iw$$iw$$iw.<init>(<console>:57)
at $line34.$read$$iw$$iw$$iw.<init>(<console>:59)
at $line34.$read$$iw$$iw.<init>(<console>:61)
at $line34.$read$$iw.<init>(<console>:63)
at $line34.$read.<init>(<console>:65)
at $line34.$read$.<init>(<console>:69)
at $line34.$read$.<clinit>(<console>)
at $line34.$eval$.$print$lzycompute(<console>:7)
at $line34.$eval$.$print(<console>:6)
at $line34.$eval.$print(<console>)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at
sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at
sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at scala.tools.nsc.interpreter.IMain$ReadEvalPrint.call(IMain.scala:786)
at scala.tools.nsc.interpreter.IMain$Request.loadAndRun(IMain.scala:1047)
at
scala.tools.nsc.interpreter.IMain$WrappedRequest$$anonfun$loadAndRunReq$1.apply(IMain.scala:638)
at
scala.tools.nsc.interpreter.IMain$WrappedRequest$$anonfun$loadAndRunReq$1.apply(IMain.scala:637)
at
scala.reflect.internal.util.ScalaClassLoader$class.asContext(ScalaClassLoader.scala:31)
at
scala.reflect.internal.util.AbstractFileClassLoader.asContext(AbstractFileClassLoader.scala:19)
at
scala.tools.nsc.interpreter.IMain$WrappedRequest.loadAndRunReq(IMain.scala:637)
at scala.tools.nsc.interpreter.IMain.interpret(IMain.scala:569)
at scala.tools.nsc.interpreter.IMain.interpret(IMain.scala:565)
at scala.tools.nsc.interpreter.ILoop.interpretStartingWith(ILoop.scala:807)
at scala.tools.nsc.interpreter.ILoop.command(ILoop.scala:681)
at scala.tools.nsc.interpreter.ILoop.processLine(ILoop.scala:395)
at scala.tools.nsc.interpreter.ILoop.loop(ILoop.scala:415)
at
scala.tools.nsc.interpreter.ILoop$$anonfun$process$1.apply$mcZ$sp(ILoop.scala:923)
at
scala.tools.nsc.interpreter.ILoop$$anonfun$process$1.apply(ILoop.scala:909)
at
scala.tools.nsc.interpreter.ILoop$$anonfun$process$1.apply(ILoop.scala:909)
at
scala.reflect.internal.util.ScalaClassLoader$.savingContextLoader(ScalaClassLoader.scala:97)
at scala.tools.nsc.interpreter.ILoop.process(ILoop.scala:909)
at org.apache.spark.repl.Main$.doMain(Main.scala:68)
at org.apache.spark.repl.Main$.main(Main.scala:51)
at org.apache.spark.repl.Main.main(Main.scala)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at
sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at
sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at
org.apache.spark.deploy.SparkSubmit$.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:736)
at org.apache.spark.deploy.SparkSubmit$.doRunMain$1(SparkSubmit.scala:185)
at org.apache.spark.deploy.SparkSubmit$.submit(SparkSubmit.scala:210)
at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:124)
at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)

stdDf: org.apache.spark.sql.DataFrame = [Rowid: string, maths: string ... 4
more fields]

What would be resolution ?

Thanks,
Chetan

Reply via email to