Hi , I am trying to use sparkSQL to join tables in different data sources - hive and teradata. I can access the tables individually but when I run join query I get an query exception.
The same query runs if all the tables exist in teradata. Any help would be appreciated. I am running the following commands using spark-shell (spark version 1.3.0) val hc = new org.apache.spark.sql.hive.HiveContext(sc) val hiveStoreSalesDF= hc.load("jdbc",Map("url"->"jdbc:hive2://192.168.145.39:10000/benchqads_teradata?user=hive&password=***","dbtable" -> "store_sales")) hiveStoreSalesDF.registerTempTable("store_sales") val dateDimDF= hc.load("jdbc",Map("url"->"jdbc:teradata://192.168.145.58/DBS_PORT=1025,DATABASE=BENCHQADS,LOB_SUPPORT=OFF,USER= BENCHQADS,PASSWORD=****","dbtable" -> "date_dim")) dateDimDF.registerTempTable("date_dim") val itemDF= hc.load("jdbc",Map("url"->"jdbc:teradata://192.168.145.58/DBS_PORT=1025,DATABASE=BENCHQADS,LOB_SUPPORT=OFF,USER= BENCHQADS,PASSWORD=****","dbtable" -> "item")) itemDF.registerTempTable("item") val report = hc.sql("select d_year, item.i_brand_id brand_id, item.i_brand brand from date_dim, store_sales, item where date_dim.d_date_sk = store_sales.ss_sold_date_sk and store_sales.ss_item_sk = item.i_item_sk and item.i_manufact_id = 922 and date_dim.d_moy=12 group by d_year ,item.i_brand ,item.i_brand_id order by d_year desc,brand_id") org.apache.spark.sql.AnalysisException: cannot resolve 'd_year' given input columns store_sales.ss_item_sk, d_following_holiday, store_sales.ss_sales_price, i_class_id, d_date_id, d_fy_year, i_wholesale_cost, store_sales.ss_store_sk, store_sales.ss_net_paid, d_day_name, d_week_seq, d_quarter_name, d_current_quarter, i_rec_end_date, i_class, store_sales.ss_sold_date_sk, d_fy_quarter_seq, store_sales.ss_ext_tax, d_moy, d_first_dom, i_current_price, i_brand, i_brand_id, d_qoy, store_sales.ss_list_price, i_container, store_sales.ss_ext_sales_price, store_sales.ss_wholesale_cost, d_dom, d_current_year, d_current_week, i_item_desc, store_sales.ss_ext_wholesale_cost, store_sales.ss_cdemo_sk, i_rec_start_date, d_last_dom, d_dow, store_sales.ss_promo_sk, i_category_id, i_color, i_units, d_date_sk, store_sales.ss_ticket_number, i_item_sk, store_sales.ss_coupon_amt, store_sales.ss_quantity, d_same_day_lq, store_sales.ss_net_paid_inc_tax, d_date, d_weekend, d_current_month, store_sales.ss_ext_discount_amt, d_current_day, store_sales.ss_hdemo_sk, d_year, i_category, d_month_seq, i_manufact_id, store_sales.ss_net_profit, store_sales.ss_addr_sk, store_sales.ss_customer_sk, d_same_day_ly, d_quarter_seq, d_holiday, i_size, store_sales.ss_ext_list_price, i_formulation, i_manager_id, i_manufact, d_fy_week_seq, i_product_name, store_sales.ss_sold_time_sk, i_item_id; line 1 pos 253 Regards, Ishwardeep -- View this message in context: http://apache-spark-user-list.1001560.n3.nabble.com/Unable-to-join-table-across-data-sources-using-sparkSQL-tp22761.html Sent from the Apache Spark User List mailing list archive at Nabble.com. --------------------------------------------------------------------- To unsubscribe, e-mail: user-unsubscr...@spark.apache.org For additional commands, e-mail: user-h...@spark.apache.org