Deepak Kumar V created SPARK-6622: ------------------------------------- Summary: Spark SQL cannot communicate with Hive meta store Key: SPARK-6622 URL: https://issues.apache.org/jira/browse/SPARK-6622 Project: Spark Issue Type: Bug Components: Spark Submit Affects Versions: 1.3.0 Reporter: Deepak Kumar V
I have multiple tables (among them is dw_bid) that are created through Apache Hive. I have data in avro on HDFS that i want to join with dw_bid table, this join needs to be done using Spark SQL. Spark SQL is unable to communicate with Apache Hive Meta store and fails with exception org.datanucleus.exceptions.NucleusDataStoreException: Unable to open a test connection to the given database. JDBC url = jdbc:mysql://hostname.vip.company.com:3306/HDB, username = hiveuser. Terminating connection pool (set lazyInit to true if you expect to start your database after your app). Original Exception: ------ java.sql.SQLException: No suitable driver found for jdbc:mysql://hostname.vip. company.com:3306/HDB at java.sql.DriverManager.getConnection(DriverManager.java:596) Spark Submit Command ./bin/spark-submit -v --master yarn-cluster --driver-class-path /apache/hadoop/share/hadoop/common/hadoop-common-2.4.1-EBAY-2.jar:/apache/hadoop-2.4.1-2.1.3.0-2-EBAY/share/hadoop/yarn/lib/guava-11.0.2.jar --jars /apache/hadoop/lib/hadoop-lzo-0.6.0.jar,/home/dvasthimal/spark1.3/mysql-connector-java-5.1.35-bin.jar,/home/dvasthimal/spark1.3/spark-avro_2.10-1.0.0.jar,/home/dvasthimal/spark1.3/spark-1.3.0-bin-hadoop2.4/lib/datanucleus-api-jdo-3.2.6.jar,/home/dvasthimal/spark1.3/spark-1.3.0-bin-hadoop2.4/lib/datanucleus-core-3.2.10.jar,/home/dvasthimal/spark1.3/spark-1.3.0-bin-hadoop2.4/lib/datanucleus-rdbms-3.2.9.jar,$SPARK_HOME/conf/hive-site.xml --num-executors 1 --driver-memory 4g --driver-java-options "-XX:MaxPermSize=2G" --executor-memory 2g --executor-cores 1 --queue hdmi-express --class com.ebay.ep.poc.spark.reporting.SparkApp spark_reporting-1.0-SNAPSHOT.jar startDate=2015-02-16 endDate=2015-02-16 input=/user/dvasthimal/epdatasets/successdetail1/part-r-00000.avro subcommand=successevents2 output=/user/dvasthimal/epdatasets/successdetail2 MySQL Java Conector Versions tried mysql-connector-java-5.0.8-bin.jar (Picked from Apache Hive installation lib folder) mysql-connector-java-5.1.34.jar mysql-connector-java-5.1.35.jar Spark Version: 1.3.0 - Prebuilt for Hadoop 2.4.x (http://d3kbcqa49mib13.cloudfront.net/spark-1.3.0-bin-hadoop2.4.tgz) $ hive --version Hive 0.13.0.2.1.3.6-2 Subversion git://ip-10-0-0-90.ec2.internal/grid/0/jenkins/workspace/BIGTOP-HDP_RPM_REPO-HDP-2.1.3.6-centos6/bigtop/build/hive/rpm/BUILD/hive-0.13.0.2.1.3.6 -r 87da9430050fb9cc429d79d95626d26ea382b96c $ Code: package com.ebay.ep.poc.spark.reporting.process.service import com.ebay.ep.poc.spark.reporting.process.util.DateUtil._ import org.apache.spark.SparkConf import org.apache.spark.SparkContext import org.apache.spark.SparkContext._ import collection.mutable.HashMap import com.databricks.spark.avro._ class HadoopSuccessEvents2Service extends ReportingService { override def execute(arguments: HashMap[String, String], sc: SparkContext) { val detail = "reporting.detail." + arguments.get("subcommand").get val startDate = arguments.get("startDate").get val endDate = arguments.get("endDate").get val input = arguments.get("input").get val output = arguments.get("output").get val sqlContext = new org.apache.spark.sql.hive.HiveContext(sc) val successDetail_S1 = sqlContext.avroFile(input) successDetail_S1.registerTempTable("sojsuccessevents1") println("show tables") sqlContext.sql("show tables") println("show tables") sqlContext.sql("CREATE TABLE `sojsuccessevents2_spark`( `guid` string COMMENT 'from deserializer', `sessionkey` bigint COMMENT 'from deserializer', `sessionstartdate` string COMMENT 'from deserializer', `sojdatadate` string COMMENT 'from deserializer', `seqnum` int COMMENT 'from deserializer', `eventtimestamp` string COMMENT 'from deserializer', `siteid` int COMMENT 'from deserializer', `successeventtype` string COMMENT 'from deserializer', `sourcetype` string COMMENT 'from deserializer', `itemid` bigint COMMENT 'from deserializer', `shopcartid` bigint COMMENT 'from deserializer', `transactionid` bigint COMMENT 'from deserializer', `offerid` bigint COMMENT 'from deserializer', `userid` bigint COMMENT 'from deserializer', `priorpage1seqnum` int COMMENT 'from deserializer', `priorpage1pageid` int COMMENT 'from deserializer', `exclwmsearchattemptseqnum` int COMMENT 'from deserializer', `exclpriorsearchpageid` int COMMENT 'from deserializer', `exclpriorsearchseqnum` int COMMENT 'from deserializer', `exclpriorsearchcategory` int COMMENT 'from deserializer', `exclpriorsearchl1` int COMMENT 'from deserializer', `exclpriorsearchl2` int COMMENT 'from deserializer', `currentimpressionid` bigint COMMENT 'from deserializer', `sourceimpressionid` bigint COMMENT 'from deserializer', `exclpriorsearchsqr` string COMMENT 'from deserializer', `exclpriorsearchsort` string COMMENT 'from deserializer', `isduplicate` int COMMENT 'from deserializer', `transactiondate` string COMMENT 'from deserializer', `auctiontypecode` int COMMENT 'from deserializer', `isbin` int COMMENT 'from deserializer', `leafcategoryid` int COMMENT 'from deserializer', `itemsiteid` int COMMENT 'from deserializer', `bidquantity` int COMMENT 'from deserializer', `bidamtusd` double COMMENT 'from deserializer', `offerquantity` int COMMENT 'from deserializer', `offeramountusd` double COMMENT 'from deserializer', `offercreatedate` string COMMENT 'from deserializer', `buyersegment` string COMMENT 'from deserializer', `buyercountryid` int COMMENT 'from deserializer', `sellerid` bigint COMMENT 'from deserializer', `sellercountryid` int COMMENT 'from deserializer', `sellerstdlevel` string COMMENT 'from deserializer', `csssellerlevel` string COMMENT 'from deserializer', `experimentchannel` int COMMENT 'from deserializer') ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.avro.AvroSerDe' STORED AS INPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat' OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat' LOCATION 'hdfs://cluster-NN-HostName:8020/user/dvasthimal/spark/successeventstaging/sojsuccessevents2' TBLPROPERTIES ( 'avro.schema.url'='hdfs://apollo-phx-nn.vip.ebay.com:8020/user/b_um/schema/successeventschema.avsc', 'numFiles'='546', 'transient_lastDdlTime'='1427359138', 'COLUMN_STATS_ACCURATE'='true', 'totalSize'='22595607864', 'numRows'='325974', 'rawDataSize'='0')") sqlContext.sql("show tables") val sojsuccessevents2 = sqlContext.sql("insert overwrite table sojsuccessevents2_spark" + " select guid,sessionKey,sessionStartDate,sojDataDate,seqNum,eventTimestamp,siteId,successEventType,sourceType,itemId," + " shopCartId,b.transaction_Id as transactionId,offerId,b.bdr_id as userId,priorPage1SeqNum,priorPage1PageId,exclWMSearchAttemptSeqNum,exclPriorSearchPageId," + " exclPriorSearchSeqNum,exclPriorSearchCategory,exclPriorSearchL1,exclPriorSearchL2,currentImpressionId,sourceImpressionId,exclPriorSearchSqr,exclPriorSearchSort," + " isDuplicate,b.bid_date as transactionDate,auctionTypeCode,isBin,leafCategoryId,itemSiteId,b.qty_bid as bidQuantity," + " b.bid_amt_unit_lstg_curncy * b.bid_exchng_rate as bidAmtUsd,offerQuantity,offerAmountUsd,offerCreateDate,buyerSegment,buyerCountryId,sellerId,sellerCountryId," + " sellerStdLevel,cssSellerLevel,a.experimentChannel" + " from sojsuccessevents1 a join dw_bid b " + " on a.itemId = b.item_id and a.transactionId = b.transaction_id " + " where b.auct_end_dt >= '" + startDate + "' AND b.bid_dt >= '" + startDate + "' " + " AND b.bid_type_code IN (1,9) AND b.bdr_id > 0 AND ( b.bid_flags & 32) = 0 and lower(a.successEventType) IN ('bid','bin')") } } -- This message was sent by Atlassian JIRA (v6.3.4#6332) --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@spark.apache.org For additional commands, e-mail: issues-h...@spark.apache.org