[ https://issues.apache.org/jira/browse/HIVE-19360?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
Jesus Camacho Rodriguez updated HIVE-19360: ------------------------------------------- Resolution: Fixed Fix Version/s: 3.2.0 4.0.0 Status: Resolved (was: Patch Available) > CBO: Add an "optimizedSQL" to QueryPlan object > ----------------------------------------------- > > Key: HIVE-19360 > URL: https://issues.apache.org/jira/browse/HIVE-19360 > Project: Hive > Issue Type: Improvement > Components: CBO, Diagnosability > Affects Versions: 3.1.0 > Reporter: Gopal V > Assignee: Gopal V > Priority: Major > Fix For: 4.0.0, 3.2.0 > > Attachments: HIVE-19360.1.patch, HIVE-19360.2.patch, > HIVE-19360.3.patch, HIVE-19360.4.patch, HIVE-19360.5.patch, > HIVE-19360.6.patch, HIVE-19360.7.patch, HIVE-19360.8.patch > > > Calcite RelNodes can be converted back into SQL (as the new JDBC storage > handler does), which allows Hive to print out the post CBO plan as a SQL > query instead of having to guess the join orders from the subsequent Tez plan. > The query generated might not be always valid SQL at this point, but is a > world ahead of DAG plans in readability. > Eg. tpc-ds Query4 CTEs gets expanded to > {code} > SELECT t16.$f3 customer_preferred_cust_flag > FROM > (SELECT t0.c_customer_id $f0, > SUM((t2.ws_ext_list_price - > t2.ws_ext_wholesale_cost - t2.ws_ext_discount_amt + t2.ws_ext_sales_price) / > CAST(2 AS DECIMAL(10, 0))) $f8 > FROM > (SELECT c_customer_sk, > c_customer_id, > c_first_name, > c_last_name, > c_preferred_cust_flag, > c_birth_country, > c_login, > c_email_address > FROM default.customer > WHERE c_customer_sk IS NOT NULL > AND c_customer_id IS NOT NULL) t0 > INNER JOIN ( > (SELECT ws_sold_date_sk, > ws_bill_customer_sk, > ws_ext_discount_amt, > ws_ext_sales_price, > ws_ext_wholesale_cost, > ws_ext_list_price > FROM default.web_sales > WHERE ws_bill_customer_sk IS NOT NULL > AND ws_sold_date_sk IS NOT NULL) t2 > INNER JOIN > (SELECT d_date_sk, > CAST(2002 AS INTEGER) d_year > FROM default.date_dim > WHERE d_year = 2002 > AND d_date_sk IS NOT NULL) t4 ON t2.ws_sold_date_sk = > t4.d_date_sk) ON t0.c_customer_sk = t2.ws_bill_customer_sk > GROUP BY t0.c_customer_id, > t0.c_first_name, > t0.c_last_name, > t0.c_preferred_cust_flag, > t0.c_birth_country, > t0.c_login, > t0.c_email_address) t7 > INNER JOIN ( > (SELECT t9.c_customer_id $f0, > t9.c_preferred_cust_flag $f3, > > SUM((t11.ss_ext_list_price - t11.ss_ext_wholesale_cost - > t11.ss_ext_discount_amt + t11.ss_ext_sales_price) / CAST(2 AS DECIMAL(10, > 0))) $f8 > FROM > (SELECT c_customer_sk, > c_customer_id, > c_first_name, > c_last_name, > c_preferred_cust_flag, > c_birth_country, > c_login, > c_email_address > FROM default.customer > WHERE c_customer_sk IS NOT NULL > AND c_customer_id IS NOT NULL) t9 > INNER JOIN ( > (SELECT ss_sold_date_sk, > ss_customer_sk, > ss_ext_discount_amt, > ss_ext_sales_price, > ss_ext_wholesale_cost, > ss_ext_list_price > FROM default.store_sales > WHERE ss_customer_sk IS NOT NULL > AND ss_sold_date_sk IS NOT NULL) t11 > INNER JOIN > (SELECT d_date_sk, > CAST(2002 AS INTEGER) d_year > FROM default.date_dim > WHERE d_year = 2002 > AND d_date_sk IS NOT NULL) t13 ON > t11.ss_sold_date_sk = t13.d_date_sk) ON t9.c_customer_sk = t11.ss_customer_sk > GROUP BY t9.c_customer_id, > t9.c_first_name, > t9.c_last_name, > t9.c_preferred_cust_flag, > t9.c_birth_country, > t9.c_login, > t9.c_email_address) t16 > INNER JOIN ( > (SELECT t18.c_customer_id $f0, > > SUM((t20.cs_ext_list_price - t20.cs_ext_wholesale_cost - > t20.cs_ext_discount_amt + t20.cs_ext_sales_price) / CAST(2 AS DECIMAL(10, > 0))) $f8 > FROM > (SELECT c_customer_sk, > c_customer_id, > c_first_name, > c_last_name, > c_preferred_cust_flag, > c_birth_country, > c_login, > c_email_address > FROM default.customer > WHERE c_customer_sk IS NOT NULL > AND c_customer_id IS NOT NULL) t18 > INNER JOIN ( > (SELECT cs_sold_date_sk, > cs_bill_customer_sk, > cs_ext_discount_amt, > cs_ext_sales_price, > cs_ext_wholesale_cost, > cs_ext_list_price > FROM default.catalog_sales > WHERE cs_bill_customer_sk IS NOT > NULL > AND cs_sold_date_sk IS NOT NULL) > t20 > INNER JOIN > (SELECT d_date_sk, > CAST(2002 AS INTEGER) d_year > FROM default.date_dim > WHERE d_year = 2002 > AND d_date_sk IS NOT NULL) t22 ON > t20.cs_sold_date_sk = t22.d_date_sk) ON t18.c_customer_sk = > t20.cs_bill_customer_sk > GROUP BY t18.c_customer_id, > t18.c_first_name, > t18.c_last_name, > t18.c_preferred_cust_flag, > t18.c_birth_country, > t18.c_login, > t18.c_email_address) t25 > INNER JOIN > (SELECT t27.c_customer_id $f0, > > SUM((t29.ss_ext_list_price - t29.ss_ext_wholesale_cost - > t29.ss_ext_discount_amt + t29.ss_ext_sales_price) / CAST(2 AS DECIMAL(10, > 0))) $f8 > FROM > (SELECT c_customer_sk, > c_customer_id, > c_first_name, > c_last_name, > c_preferred_cust_flag, > c_birth_country, > c_login, > c_email_address > FROM default.customer > WHERE c_customer_sk IS NOT NULL > AND c_customer_id IS NOT NULL) t27 > INNER JOIN ( > (SELECT ss_sold_date_sk, > ss_customer_sk, > ss_ext_discount_amt, > ss_ext_sales_price, > ss_ext_wholesale_cost, > ss_ext_list_price > FROM default.store_sales > WHERE ss_customer_sk IS NOT NULL > AND ss_sold_date_sk IS NOT NULL) > t29 > INNER JOIN > (SELECT d_date_sk, > CAST(2001 AS INTEGER) d_year > FROM default.date_dim > WHERE d_year = 2001 > AND d_date_sk IS NOT NULL) t31 ON > t29.ss_sold_date_sk = t31.d_date_sk) ON t27.c_customer_sk = t29.ss_customer_sk > GROUP BY t27.c_customer_id, > t27.c_first_name, > t27.c_last_name, > t27.c_preferred_cust_flag, > t27.c_birth_country, > t27.c_login, > t27.c_email_address > HAVING SUM((t29.ss_ext_list_price - > t29.ss_ext_wholesale_cost - t29.ss_ext_discount_amt + t29.ss_ext_sales_price) > / CAST(2 AS DECIMAL(10, 0))) > 0) t35 ON t25.$f0 = t35.$f0) ON t16.$f0 = > t35.$f0 > INNER JOIN > (SELECT t37.c_customer_id $f0, > SUM((t39.cs_ext_list_price - > t39.cs_ext_wholesale_cost - t39.cs_ext_discount_amt + t39.cs_ext_sales_price) > / CAST(2 AS DECIMAL(10, 0))) $f8 > FROM > (SELECT c_customer_sk, > c_customer_id, > c_first_name, > c_last_name, > c_preferred_cust_flag, > c_birth_country, > c_login, > c_email_address > FROM default.customer > WHERE c_customer_sk IS NOT NULL > AND c_customer_id IS NOT NULL) t37 > INNER JOIN ( > (SELECT cs_sold_date_sk, > cs_bill_customer_sk, > cs_ext_discount_amt, > cs_ext_sales_price, > cs_ext_wholesale_cost, > cs_ext_list_price > FROM default.catalog_sales > WHERE cs_bill_customer_sk IS NOT NULL > AND cs_sold_date_sk IS NOT NULL) t39 > INNER JOIN > (SELECT d_date_sk, > CAST(2001 AS INTEGER) d_year > FROM default.date_dim > WHERE d_year = 2001 > AND d_date_sk IS NOT NULL) t41 ON > t39.cs_sold_date_sk = t41.d_date_sk) ON t37.c_customer_sk = > t39.cs_bill_customer_sk > GROUP BY t37.c_customer_id, > t37.c_first_name, > t37.c_last_name, > t37.c_preferred_cust_flag, > t37.c_birth_country, > t37.c_login, > t37.c_email_address > HAVING SUM((t39.cs_ext_list_price - t39.cs_ext_wholesale_cost > - t39.cs_ext_discount_amt + t39.cs_ext_sales_price) / CAST(2 AS DECIMAL(10, > 0))) > 0) t45 ON t25.$f8 / t45.$f8 > t16.$f8 / t35.$f8 > AND t35.$f0 = t45.$f0) ON t7.$f0 = t35.$f0 > INNER JOIN > (SELECT t47.c_customer_id $f0, > SUM((t49.ws_ext_list_price - > t49.ws_ext_wholesale_cost - t49.ws_ext_discount_amt + t49.ws_ext_sales_price) > / CAST(2 AS DECIMAL(10, 0))) $f8 > FROM > (SELECT c_customer_sk, > c_customer_id, > c_first_name, > c_last_name, > c_preferred_cust_flag, > c_birth_country, > c_login, > c_email_address > FROM default.customer > WHERE c_customer_sk IS NOT NULL > AND c_customer_id IS NOT NULL) t47 > INNER JOIN ( > (SELECT ws_sold_date_sk, > ws_bill_customer_sk, > ws_ext_discount_amt, > ws_ext_sales_price, > ws_ext_wholesale_cost, > ws_ext_list_price > FROM default.web_sales > WHERE ws_bill_customer_sk IS NOT NULL > AND ws_sold_date_sk IS NOT NULL) t49 > INNER JOIN > (SELECT d_date_sk, > CAST(2001 AS INTEGER) d_year > FROM default.date_dim > WHERE d_year = 2001 > AND d_date_sk IS NOT NULL) t51 ON t49.ws_sold_date_sk = > t51.d_date_sk) ON t47.c_customer_sk = t49.ws_bill_customer_sk > GROUP BY t47.c_customer_id, > t47.c_first_name, > t47.c_last_name, > t47.c_preferred_cust_flag, > t47.c_birth_country, > t47.c_login, > t47.c_email_address > HAVING SUM((t49.ws_ext_list_price - t49.ws_ext_wholesale_cost - > t49.ws_ext_discount_amt + t49.ws_ext_sales_price) / CAST(2 AS DECIMAL(10, > 0))) > 0) t55 ON t25.$f8 / t45.$f8 > t7.$f8 / t55.$f8 > AND t35.$f0 = t55.$f0 > ORDER BY t16.$f3 IS NULL DESC, t16.$f3 > LIMIT 100 > {code} -- This message was sent by Atlassian JIRA (v7.6.3#76005)