Hello Spark Folks, Other weird experience i have with Spark with SqlContext is when i created Dataframe sometime this error throws exception and sometime not !
scala> import sqlContext.implicits._ import sqlContext.implicits._ scala> val stdDf = sqlContext.createDataFrame(rowRDD,empSchema.struct); 17/01/17 10:27:15 ERROR metastore.RetryingHMSHandler: AlreadyExistsException(message:Database default already exists) at org.apache.hadoop.hive.metastore.HiveMetaStore$HMSHandler.create_database(HiveMetaStore.java:891) at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) at java.lang.reflect.Method.invoke(Method.java:498) at org.apache.hadoop.hive.metastore.RetryingHMSHandler.invoke(RetryingHMSHandler.java:107) at com.sun.proxy.$Proxy21.create_database(Unknown Source) at org.apache.hadoop.hive.metastore.HiveMetaStoreClient.createDatabase(HiveMetaStoreClient.java:644) at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) at java.lang.reflect.Method.invoke(Method.java:498) at org.apache.hadoop.hive.metastore.RetryingMetaStoreClient.invoke(RetryingMetaStoreClient.java:156) at com.sun.proxy.$Proxy22.createDatabase(Unknown Source) at org.apache.hadoop.hive.ql.metadata.Hive.createDatabase(Hive.java:306) at org.apache.spark.sql.hive.client.HiveClientImpl$$anonfun$createDatabase$1.apply$mcV$sp(HiveClientImpl.scala:309) at org.apache.spark.sql.hive.client.HiveClientImpl$$anonfun$createDatabase$1.apply(HiveClientImpl.scala:309) at org.apache.spark.sql.hive.client.HiveClientImpl$$anonfun$createDatabase$1.apply(HiveClientImpl.scala:309) at org.apache.spark.sql.hive.client.HiveClientImpl$$anonfun$withHiveState$1.apply(HiveClientImpl.scala:280) at org.apache.spark.sql.hive.client.HiveClientImpl.liftedTree1$1(HiveClientImpl.scala:227) at org.apache.spark.sql.hive.client.HiveClientImpl.retryLocked(HiveClientImpl.scala:226) at org.apache.spark.sql.hive.client.HiveClientImpl.withHiveState(HiveClientImpl.scala:269) at org.apache.spark.sql.hive.client.HiveClientImpl.createDatabase(HiveClientImpl.scala:308) at org.apache.spark.sql.hive.HiveExternalCatalog$$anonfun$createDatabase$1.apply$mcV$sp(HiveExternalCatalog.scala:99) at org.apache.spark.sql.hive.HiveExternalCatalog$$anonfun$createDatabase$1.apply(HiveExternalCatalog.scala:99) at org.apache.spark.sql.hive.HiveExternalCatalog$$anonfun$createDatabase$1.apply(HiveExternalCatalog.scala:99) at org.apache.spark.sql.hive.HiveExternalCatalog.withClient(HiveExternalCatalog.scala:72) at org.apache.spark.sql.hive.HiveExternalCatalog.createDatabase(HiveExternalCatalog.scala:98) at org.apache.spark.sql.catalyst.catalog.SessionCatalog.createDatabase(SessionCatalog.scala:147) at org.apache.spark.sql.catalyst.catalog.SessionCatalog.<init>(SessionCatalog.scala:89) at org.apache.spark.sql.hive.HiveSessionCatalog.<init>(HiveSessionCatalog.scala:51) at org.apache.spark.sql.hive.HiveSessionState.catalog$lzycompute(HiveSessionState.scala:49) at org.apache.spark.sql.hive.HiveSessionState.catalog(HiveSessionState.scala:48) at org.apache.spark.sql.hive.HiveSessionState$$anon$1.<init>(HiveSessionState.scala:63) at org.apache.spark.sql.hive.HiveSessionState.analyzer$lzycompute(HiveSessionState.scala:63) at org.apache.spark.sql.hive.HiveSessionState.analyzer(HiveSessionState.scala:62) at org.apache.spark.sql.execution.QueryExecution.assertAnalyzed(QueryExecution.scala:49) at org.apache.spark.sql.Dataset$.ofRows(Dataset.scala:64) at org.apache.spark.sql.SparkSession.createDataFrame(SparkSession.scala:542) at org.apache.spark.sql.SparkSession.createDataFrame(SparkSession.scala:302) at org.apache.spark.sql.SQLContext.createDataFrame(SQLContext.scala:337) at $line28.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw.<init>(<console>:43) at $line28.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw.<init>(<console>:48) at $line28.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw.<init>(<console>:50) at $line28.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw.<init>(<console>:52) at $line28.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw.<init>(<console>:54) at $line28.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw.<init>(<console>:56) at $line28.$read$$iw$$iw$$iw$$iw$$iw$$iw.<init>(<console>:58) at $line28.$read$$iw$$iw$$iw$$iw$$iw.<init>(<console>:60) at $line28.$read$$iw$$iw$$iw$$iw.<init>(<console>:62) at $line28.$read$$iw$$iw$$iw.<init>(<console>:64) at $line28.$read$$iw$$iw.<init>(<console>:66) at $line28.$read$$iw.<init>(<console>:68) at $line28.$read.<init>(<console>:70) at $line28.$read$.<init>(<console>:74) at $line28.$read$.<clinit>(<console>) at $line28.$eval$.$print$lzycompute(<console>:7) at $line28.$eval$.$print(<console>:6) at $line28.$eval.$print(<console>) at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) at java.lang.reflect.Method.invoke(Method.java:498) at scala.tools.nsc.interpreter.IMain$ReadEvalPrint.call(IMain.scala:786) at scala.tools.nsc.interpreter.IMain$Request.loadAndRun(IMain.scala:1047) at scala.tools.nsc.interpreter.IMain$WrappedRequest$$anonfun$loadAndRunReq$1.apply(IMain.scala:638) at scala.tools.nsc.interpreter.IMain$WrappedRequest$$anonfun$loadAndRunReq$1.apply(IMain.scala:637) at scala.reflect.internal.util.ScalaClassLoader$class.asContext(ScalaClassLoader.scala:31) at scala.reflect.internal.util.AbstractFileClassLoader.asContext(AbstractFileClassLoader.scala:19) at scala.tools.nsc.interpreter.IMain$WrappedRequest.loadAndRunReq(IMain.scala:637) at scala.tools.nsc.interpreter.IMain.interpret(IMain.scala:569) at scala.tools.nsc.interpreter.IMain.interpret(IMain.scala:565) at scala.tools.nsc.interpreter.ILoop.interpretStartingWith(ILoop.scala:807) at scala.tools.nsc.interpreter.ILoop.command(ILoop.scala:681) at scala.tools.nsc.interpreter.ILoop.processLine(ILoop.scala:395) at scala.tools.nsc.interpreter.ILoop.loop(ILoop.scala:415) at scala.tools.nsc.interpreter.ILoop$$anonfun$process$1.apply$mcZ$sp(ILoop.scala:923) at scala.tools.nsc.interpreter.ILoop$$anonfun$process$1.apply(ILoop.scala:909) at scala.tools.nsc.interpreter.ILoop$$anonfun$process$1.apply(ILoop.scala:909) at scala.reflect.internal.util.ScalaClassLoader$.savingContextLoader(ScalaClassLoader.scala:97) at scala.tools.nsc.interpreter.ILoop.process(ILoop.scala:909) at org.apache.spark.repl.Main$.doMain(Main.scala:68) at org.apache.spark.repl.Main$.main(Main.scala:51) at org.apache.spark.repl.Main.main(Main.scala) at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) at java.lang.reflect.Method.invoke(Method.java:498) at org.apache.spark.deploy.SparkSubmit$.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:736) at org.apache.spark.deploy.SparkSubmit$.doRunMain$1(SparkSubmit.scala:185) at org.apache.spark.deploy.SparkSubmit$.submit(SparkSubmit.scala:210) at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:124) at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala) stdDf: org.apache.spark.sql.DataFrame = [stid: string, name: string ... 3 more fields] again same works without exception: scala> import sqlContext.implicits._ import sqlContext.implicits._ scala> val stdDf = sqlContext.createDataFrame(rowRDD,empSchema.struct); stdDf: org.apache.spark.sql.DataFrame = [stid: string, name: string ... 3 more fields] Thanks. On Tue, Jan 17, 2017 at 12:48 AM, Chetan Khatri <chetan.opensou...@gmail.com > wrote: > Hello Community, > > I am struggling to save Dataframe to Hive Table, > > Versions: > > Hive 1.2.1 > Spark 2.0.1 > > *Working code:* > > /* > @Author: Chetan Khatri > /* @Author: Chetan Khatri Description: This Scala script has written for > HBase to Hive module, which reads table from HBase and dump it out to Hive > */ import it.nerdammer.spark.hbase._ import org.apache.spark.sql.Row import > org.apache.spark.sql.types.StructType import > org.apache.spark.sql.types.StructField > import org.apache.spark.sql.types.StringType import > org.apache.spark.sql.SparkSession > // Approach 1: // Read HBase Table val hBaseRDD = > sc.hbaseTable[(Option[String], Option[String], Option[String], > Option[String], Option[String])]("university").select("stid", > "name","subject","grade","city").inColumnFamily("emp") // Iterate > HBaseRDD and generate RDD[Row] val rowRDD = hBaseRDD.map(i => > Row(i._1.get,i._2.get,i._3.get,i._4.get,i._5.get)) // Create sqlContext > for createDataFrame method val sqlContext = new > org.apache.spark.sql.SQLContext(sc) > // Create Schema Structure object empSchema { val stid = > StructField("stid", StringType) val name = StructField("name", StringType) > val subject = StructField("subject", StringType) val grade = > StructField("grade", StringType) val city = StructField("city", StringType) > val struct = StructType(Array(stid, name, subject, grade, city)) } import > sqlContext.implicits._ // Create DataFrame with rowRDD and Schema structure > val stdDf = sqlContext.createDataFrame(rowRDD,empSchema.struct); // > Importing Hive import org.apache.spark.sql.hive // Enable Hive with Hive > warehouse in SparkSession val spark = SparkSession.builder().appName("Spark > Hive Example").config("spark.sql.warehouse.dir", > "/usr/local/hive/warehouse/").enableHiveSupport().getOrCreate() // Saving > Dataframe to Hive Table Successfully. > stdDf.write.mode("append").saveAsTable("employee") > // Approach 2 : Where error comes import spark.implicits._ import spark.sql > sql("use default") sql("create table employee(stid STRING, name STRING, > subject STRING, grade STRING, city STRING)") scala> sql("show > TABLES").show() +---------+-----------+ |tableName|isTemporary| > +---------+-----------+ | employee| false| +---------+-----------+ > stdDf.write.mode("append").saveAsTable("employee") ERROR Exception: > org.apache.spark.sql.AnalysisException: Saving data in MetastoreRelation > default, employee is not supported.; at org.apache.spark.sql. > execution.command.CreateDataSourceTableAsSelectCommand.run( > createDataSourceTables.scala:221) at org.apache.spark.sql. > execution.command.ExecutedCommandExec.sideEffectResult$lzycompute(commands.scala:58) > at org.apache.spark.sql.execution.command.ExecutedCommandExec. > sideEffectResult(commands.scala:56) at org.apache.spark.sql. > execution.command.ExecutedCommandExec.doExecute(commands.scala:74) at > org.apache.spark.sql.execution.SparkPlan$$anonfun$ > execute$1.apply(SparkPlan.scala:115) at org.apache.spark.sql. > execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:115) at > org.apache.spark.sql.execution.SparkPlan$$anonfun$executeQuery$1.apply(SparkPlan.scala:136) > at > org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151) > at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:133) > at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:114) > at > org.apache.spark.sql.execution.QueryExecution.toRdd$lzycompute(QueryExecution.scala:86) > at > org.apache.spark.sql.execution.QueryExecution.toRdd(QueryExecution.scala:86) > at org.apache.spark.sql.DataFrameWriter.saveAsTable(DataFrameWriter.scala:378) > at org.apache.spark.sql.DataFrameWriter.saveAsTable(DataFrameWriter.scala:354) > ... 56 elided Questions: At Approach 1, It stores data where hive table is > not previously created, when i say saveAsTable it automatically creates for > me and next time it also appends data into that, How to store data in > previously created tables ? > It also gives warning WARN metastore.HiveMetaStore: Location: > file:/usr/local/spark/spark-warehouse/employee specified for non-external > table:employee but i have already provided path of HiveMetaStore then why > it is storing in spark's warehouse meta-store. > > Hive-setup done with reference to: http://mitu.co.in/wp- > content/uploads/2015/12/Hive-Installation-on-Ubuntu-14.04- > and-Hadoop-2.6.3.pdf and it's working well, I could not change the Hive > version, it must be 1.2.1 > > Thank you. > > > > >