http://git-wip-us.apache.org/repos/asf/carbondata/blob/0ef7e55c/datamap/mv/plan/src/main/scala/org/apache/carbondata/mv/dsl/package.scala ---------------------------------------------------------------------- diff --git a/datamap/mv/plan/src/main/scala/org/apache/carbondata/mv/dsl/package.scala b/datamap/mv/plan/src/main/scala/org/apache/carbondata/mv/dsl/package.scala index 20b5e8a..241d6a8 100644 --- a/datamap/mv/plan/src/main/scala/org/apache/carbondata/mv/dsl/package.scala +++ b/datamap/mv/plan/src/main/scala/org/apache/carbondata/mv/dsl/package.scala @@ -35,7 +35,7 @@ import org.apache.carbondata.mv.plans.util._ */ package object dsl { - // object plans { + object Plans { implicit class DslModularPlan(val modularPlan: ModularPlan) { def select(outputExprs: NamedExpression*) @@ -96,6 +96,6 @@ package object dsl { def optimize: LogicalPlan = BirdcageOptimizer.execute(logicalPlan) } - // } + } }
http://git-wip-us.apache.org/repos/asf/carbondata/blob/0ef7e55c/datamap/mv/plan/src/main/scala/org/apache/carbondata/mv/plans/util/LogicalPlanSignatureGenerator.scala ---------------------------------------------------------------------- diff --git a/datamap/mv/plan/src/main/scala/org/apache/carbondata/mv/plans/util/LogicalPlanSignatureGenerator.scala b/datamap/mv/plan/src/main/scala/org/apache/carbondata/mv/plans/util/LogicalPlanSignatureGenerator.scala index 0c5661e..2aff5c0 100644 --- a/datamap/mv/plan/src/main/scala/org/apache/carbondata/mv/plans/util/LogicalPlanSignatureGenerator.scala +++ b/datamap/mv/plan/src/main/scala/org/apache/carbondata/mv/plans/util/LogicalPlanSignatureGenerator.scala @@ -30,8 +30,7 @@ object CheckSPJG { case a: Aggregate => a.child.collect { case Join(_, _, _, _) | Project(_, _) | Filter(_, _) | -// CatalogRelation(_, _, _) | - LogicalRelation(_, _, _) | LocalRelation(_, _) => true + HiveTableRelation(_, _, _) | LogicalRelation(_, _, _) | LocalRelation(_, _) => true case _ => false }.forall(identity) case _ => false @@ -59,10 +58,10 @@ object LogicalPlanRule extends SignatureRule[LogicalPlan] { case LogicalRelation(_, _, _) => // TODO: implement this (link to BaseRelation) None -// case CatalogRelation(tableMeta, _, _) => -// Some(Signature(false, -// Set(Seq(tableMeta.database, tableMeta.identifier.table).mkString(".")))) - case l: LocalRelation => + case HiveTableRelation(tableMeta, _, _) => + Some(Signature(false, + Set(Seq(tableMeta.database, tableMeta.identifier.table).mkString(".")))) + case l : LocalRelation => // LocalRelation is for unit test cases Some(Signature(groupby = false, Set(l.toString()))) case Filter(_, _) => http://git-wip-us.apache.org/repos/asf/carbondata/blob/0ef7e55c/datamap/mv/plan/src/main/scala/org/apache/carbondata/mv/plans/util/SQLBuilder.scala ---------------------------------------------------------------------- diff --git a/datamap/mv/plan/src/main/scala/org/apache/carbondata/mv/plans/util/SQLBuilder.scala b/datamap/mv/plan/src/main/scala/org/apache/carbondata/mv/plans/util/SQLBuilder.scala index 4bc8b97..b6e62eb 100644 --- a/datamap/mv/plan/src/main/scala/org/apache/carbondata/mv/plans/util/SQLBuilder.scala +++ b/datamap/mv/plan/src/main/scala/org/apache/carbondata/mv/plans/util/SQLBuilder.scala @@ -21,7 +21,7 @@ import java.util.concurrent.atomic.AtomicLong import scala.collection.immutable -import org.apache.spark.sql.catalyst.expressions.{Alias, Attribute, AttributeMap, AttributeReference, AttributeSet, Expression, NamedExpression} +import org.apache.spark.sql.catalyst.expressions.{Alias, Attribute, AttributeMap, AttributeReference, AttributeSet, Cast, Expression, NamedExpression} import org.apache.spark.sql.catalyst.rules.{Rule, RuleExecutor} import org.apache.carbondata.mv.expressions.modular._ @@ -71,6 +71,10 @@ class SQLBuilder private( CleanupQualifier, // Insert sub queries on top of operators that need to appear after FROM clause. AddSubquery + // Removes [[Cast Casts]] that are unnecessary when converting back to SQL + // Comment out for now, will add later by converting AttributMap to Map in SQLBuildDSL + // .scala + // RemoveCasts ) ) } @@ -217,6 +221,14 @@ class SQLBuilder private( } } + object RemoveCasts extends Rule[ModularPlan] { + def apply(tree: ModularPlan): ModularPlan = { + tree transformAllExpressions { + case Cast(e, dataType, _) => e + } + } + } + } object SQLBuilder { http://git-wip-us.apache.org/repos/asf/carbondata/blob/0ef7e55c/datamap/mv/plan/src/main/scala/org/apache/carbondata/mv/testutil/Tpcds_1_4_Tables.scala ---------------------------------------------------------------------- diff --git a/datamap/mv/plan/src/main/scala/org/apache/carbondata/mv/testutil/Tpcds_1_4_Tables.scala b/datamap/mv/plan/src/main/scala/org/apache/carbondata/mv/testutil/Tpcds_1_4_Tables.scala index 97772c7..175b319 100644 --- a/datamap/mv/plan/src/main/scala/org/apache/carbondata/mv/testutil/Tpcds_1_4_Tables.scala +++ b/datamap/mv/plan/src/main/scala/org/apache/carbondata/mv/testutil/Tpcds_1_4_Tables.scala @@ -20,7 +20,7 @@ package org.apache.carbondata.mv.testutil object Tpcds_1_4_Tables { val tpcds1_4Tables = Seq[String]( s""" - |CREATE TABLE catalog_sales ( + |CREATE TABLE IF NOT EXISTS catalog_sales ( | `cs_sold_date_sk` int, | `cs_sold_time_sk` int, | `cs_ship_date_sk` int, @@ -56,10 +56,11 @@ object Tpcds_1_4_Tables { | `cs_net_paid_inc_ship_tax` decimal(7,2), | `cs_net_profit` decimal(7,2) |) - |STORED BY 'org.apache.carbondata.format' + |ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' + |STORED AS TEXTFILE """.stripMargin.trim, s""" - |CREATE TABLE catalog_returns ( + |CREATE TABLE IF NOT EXISTS catalog_returns ( | `cr_returned_date_sk` int, | `cr_returned_time_sk` int, | `cr_item_sk` int, @@ -88,19 +89,21 @@ object Tpcds_1_4_Tables { | `cr_store_credit` decimal(7,2), | `cr_net_loss` decimal(7,2) |) - |STORED BY 'org.apache.carbondata.format' + |ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' + |STORED AS TEXTFILE """.stripMargin.trim, s""" - |CREATE TABLE inventory ( + |CREATE TABLE IF NOT EXISTS inventory ( | `inv_date_sk` int, | `inv_item_sk` int, | `inv_warehouse_sk` int, | `inv_quantity_on_hand` int |) - |STORED BY 'org.apache.carbondata.format' + |ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' + |STORED AS TEXTFILE """.stripMargin.trim, s""" - |CREATE TABLE store_sales ( + |CREATE TABLE IF NOT EXISTS store_sales ( | `ss_sold_date_sk` int, | `ss_sold_time_sk` int, | `ss_item_sk` int, @@ -125,10 +128,11 @@ object Tpcds_1_4_Tables { | `ss_net_paid_inc_tax` decimal(7,2), | `ss_net_profit` decimal(7,2) |) - |STORED BY 'org.apache.carbondata.format' + |ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' + |STORED AS TEXTFILE """.stripMargin.trim, s""" - |CREATE TABLE store_returns ( + |CREATE TABLE IF NOT EXISTS store_returns ( | `sr_returned_date_sk` int, | `sr_return_time_sk` int, | `sr_item_sk` int, @@ -150,10 +154,11 @@ object Tpcds_1_4_Tables { | `sr_store_credit` decimal(7,2), | `sr_net_loss` decimal(7,2) |) - |STORED BY 'org.apache.carbondata.format' + |ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' + |STORED AS TEXTFILE """.stripMargin.trim, s""" - |CREATE TABLE web_sales ( + |CREATE TABLE IF NOT EXISTS web_sales ( | `ws_sold_date_sk` int, | `ws_sold_time_sk` int, | `ws_ship_date_sk` int, @@ -189,10 +194,11 @@ object Tpcds_1_4_Tables { | `ws_net_paid_inc_ship_tax` decimal(7,2), | `ws_net_profit` decimal(7,2) |) - |STORED BY 'org.apache.carbondata.format' + |ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' + |STORED AS TEXTFILE """.stripMargin.trim, s""" - |CREATE TABLE web_returns ( + |CREATE TABLE IF NOT EXISTS web_returns ( | `wr_returned_date_sk` int, | `wr_returned_time_sk` int, | `wr_item_sk` int, @@ -218,10 +224,11 @@ object Tpcds_1_4_Tables { | `wr_account_credit` decimal(7,2), | `wr_net_loss` decimal(7,2) |) - |STORED BY 'org.apache.carbondata.format' + |ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' + |STORED AS TEXTFILE """.stripMargin.trim, s""" - |CREATE TABLE call_center ( + |CREATE TABLE IF NOT EXISTS call_center ( | `cc_call_center_sk` int, | `cc_call_center_id` string, | `cc_rec_start_date` date, @@ -254,10 +261,11 @@ object Tpcds_1_4_Tables { | `cc_gmt_offset` decimal(5,2), | `cc_tax_percentage` decimal(5,2) |) - |STORED BY 'org.apache.carbondata.format' + |ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' + |STORED AS TEXTFILE """.stripMargin.trim, s""" - |CREATE TABLE catalog_page ( + |CREATE TABLE IF NOT EXISTS catalog_page ( | `cp_catalog_page_sk` int, | `cp_catalog_page_id` string, | `cp_start_date_sk` int, @@ -268,10 +276,11 @@ object Tpcds_1_4_Tables { | `cp_description` string, | `cp_type` string |) - |STORED BY 'org.apache.carbondata.format' + |ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' + |STORED AS TEXTFILE """.stripMargin.trim, s""" - |CREATE TABLE customer ( + |CREATE TABLE IF NOT EXISTS customer ( | `c_customer_sk` int, | `c_customer_id` string, | `c_current_cdemo_sk` int, @@ -291,10 +300,11 @@ object Tpcds_1_4_Tables { | `c_email_address` string, | `c_last_review_date` string |) - |STORED BY 'org.apache.carbondata.format' + |ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' + |STORED AS TEXTFILE """.stripMargin.trim, s""" - |CREATE TABLE customer_address ( + |CREATE TABLE IF NOT EXISTS customer_address ( | `ca_address_sk` int, | `ca_address_id` string, | `ca_street_number` string, @@ -309,10 +319,11 @@ object Tpcds_1_4_Tables { | `ca_gmt_offset` decimal(5,2), | `ca_location_type` string |) - |STORED BY 'org.apache.carbondata.format' + |ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' + |STORED AS TEXTFILE """.stripMargin.trim, s""" - |CREATE TABLE customer_demographics ( + |CREATE TABLE IF NOT EXISTS customer_demographics ( | `cd_demo_sk` int, | `cd_gender` string, | `cd_marital_status` string, @@ -323,10 +334,11 @@ object Tpcds_1_4_Tables { | `cd_dep_employed_count` int, | `cd_dep_college_count` int |) - |STORED BY 'org.apache.carbondata.format' + |ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' + |STORED AS TEXTFILE """.stripMargin.trim, s""" - |CREATE TABLE date_dim ( + |CREATE TABLE IF NOT EXISTS date_dim ( | `d_date_sk` int, | `d_date_id` string, | `d_date` date, @@ -356,28 +368,31 @@ object Tpcds_1_4_Tables { | `d_current_quarter` string, | `d_current_year` string |) - |STORED BY 'org.apache.carbondata.format' + |ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' + |STORED AS TEXTFILE """.stripMargin.trim, s""" - |CREATE TABLE household_demographics ( + |CREATE TABLE IF NOT EXISTS household_demographics ( | `hd_demo_sk` int, | `hd_income_band_sk` int, | `hd_buy_potential` string, | `hd_dep_count` int, | `hd_vehicle_count` int |) - |STORED BY 'org.apache.carbondata.format' + |ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' + |STORED AS TEXTFILE """.stripMargin.trim, s""" - |CREATE TABLE income_band ( + |CREATE TABLE IF NOT EXISTS income_band ( | `ib_income_band_sk` int, | `ib_lower_bound` int, | `ib_upper_bound` int |) - |STORED BY 'org.apache.carbondata.format' + |ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' + |STORED AS TEXTFILE """.stripMargin.trim, s""" - |CREATE TABLE item ( + |CREATE TABLE IF NOT EXISTS item ( | `i_item_sk` int, | `i_item_id` string, | `i_rec_start_date` date, @@ -401,10 +416,11 @@ object Tpcds_1_4_Tables { | `i_manager_id` int, | `i_product_name` string |) - |STORED BY 'org.apache.carbondata.format' + |ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' + |STORED AS TEXTFILE """.stripMargin.trim, s""" - |CREATE TABLE promotion ( + |CREATE TABLE IF NOT EXISTS promotion ( | `p_promo_sk` int, | `p_promo_id` string, | `p_start_date_sk` int, @@ -425,18 +441,20 @@ object Tpcds_1_4_Tables { | `p_purpose` string, | `p_discount_active` string |) - |STORED BY 'org.apache.carbondata.format' + |ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' + |STORED AS TEXTFILE """.stripMargin.trim, s""" - |CREATE TABLE reason ( + |CREATE TABLE IF NOT EXISTS reason ( | `r_reason_sk` int, | `r_reason_id` string, | `r_reason_desc` string |) - |STORED BY 'org.apache.carbondata.format' + |ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' + |STORED AS TEXTFILE """.stripMargin.trim, s""" - |CREATE TABLE ship_mode ( + |CREATE TABLE IF NOT EXISTS ship_mode ( | `sm_ship_mode_sk` int, | `sm_ship_mode_id` string, | `sm_type` string, @@ -444,10 +462,11 @@ object Tpcds_1_4_Tables { | `sm_carrier` string, | `sm_contract` string |) - |STORED BY 'org.apache.carbondata.format' + |ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' + |STORED AS TEXTFILE """.stripMargin.trim, s""" - |CREATE TABLE store ( + |CREATE TABLE IF NOT EXISTS store ( | `s_store_sk` int, | `s_store_id` string, | `s_rec_start_date` date, @@ -478,10 +497,11 @@ object Tpcds_1_4_Tables { | `s_gmt_offset` decimal(5,2), | `s_tax_precentage` decimal(5,2) |) - |STORED BY 'org.apache.carbondata.format' + |ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' + |STORED AS TEXTFILE """.stripMargin.trim, s""" - |CREATE TABLE time_dim ( + |CREATE TABLE IF NOT EXISTS time_dim ( | `t_time_sk` int, | `t_time_id` string, | `t_time` int, @@ -493,10 +513,11 @@ object Tpcds_1_4_Tables { | `t_sub_shift` string, | `t_meal_time` string |) - |STORED BY 'org.apache.carbondata.format' + |ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' + |STORED AS TEXTFILE """.stripMargin.trim, s""" - |CREATE TABLE warehouse ( + |CREATE TABLE IF NOT EXISTS warehouse ( | `w_warehouse_sk` int, | `w_warehouse_id` string, | `w_warehouse_name` string, @@ -512,10 +533,11 @@ object Tpcds_1_4_Tables { | `w_country` string, | `w_gmt_offset` decimal(5,2) |) - |STORED BY 'org.apache.carbondata.format' + |ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' + |STORED AS TEXTFILE """.stripMargin.trim, s""" - |CREATE TABLE web_page ( + |CREATE TABLE IF NOT EXISTS web_page ( | `wp_web_page_sk` int, | `wp_web_page_id` string, | `wp_rec_start_date` date, @@ -531,10 +553,11 @@ object Tpcds_1_4_Tables { | `wp_image_count` int, | `wp_max_ad_count` int |) - |STORED BY 'org.apache.carbondata.format' + |ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' + |STORED AS TEXTFILE """.stripMargin.trim, s""" - |CREATE TABLE web_site ( + |CREATE TABLE IF NOT EXISTS web_site ( | `web_site_sk` int, | `web_site_id` string, | `web_rec_start_date` date, @@ -562,10 +585,11 @@ object Tpcds_1_4_Tables { | `web_gmt_offset` decimal(5,2), | `web_tax_percentage` decimal(5,2) |) - |STORED BY 'org.apache.carbondata.format' + |ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' + |STORED AS TEXTFILE """.stripMargin.trim, s""" - |CREATE TABLE sdr_dyn_seq_custer_iot_all_hour_60min + |CREATE TABLE IF NOT EXISTS sdr_dyn_seq_custer_iot_all_hour_60min |( | `dim_1` String, | `dim_51` String, @@ -770,10 +794,11 @@ object Tpcds_1_4_Tables { | `counter_100` double, | `batchno` double |) - |STORED BY 'org.apache.carbondata.format' + |ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' + |STORED AS TEXTFILE """.stripMargin.trim, s""" - |CREATE TABLE dim_apn_iot + |CREATE TABLE IF NOT EXISTS dim_apn_iot |( | `city_ascription` String, | `industry` String, @@ -782,10 +807,11 @@ object Tpcds_1_4_Tables { | `customer_name` String, | `id` bigint |) - |STORED BY 'org.apache.carbondata.format' + |ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' + |STORED AS TEXTFILE """.stripMargin.trim, s""" - |CREATE TABLE tradeflow_all ( + |CREATE TABLE IF NOT EXISTS tradeflow_all ( | m_month smallint, | hs_code string , | country smallint, @@ -795,17 +821,17 @@ object Tpcds_1_4_Tables { | b_country smallint, | imex smallint, | y_year smallint) - |STORED BY 'org.apache.carbondata.format' + |STORED AS parquet """.stripMargin.trim, s""" - |CREATE TABLE country ( + |CREATE TABLE IF NOT EXISTS country ( | countryid smallint , | country_en string , | country_cn string ) - |STORED BY 'org.apache.carbondata.format' + |STORED AS parquet """.stripMargin.trim, s""" - |CREATE TABLE updatetime ( + |CREATE TABLE IF NOT EXISTS updatetime ( | countryid smallint , | imex smallint , | hs_len smallint , @@ -813,7 +839,7 @@ object Tpcds_1_4_Tables { | startdate string , | newdate string , | minnewdate string ) - |STORED BY 'org.apache.carbondata.format' + |STORED AS parquet """.stripMargin.trim ) } http://git-wip-us.apache.org/repos/asf/carbondata/blob/0ef7e55c/datamap/mv/plan/src/test/scala/org/apache/carbondata/mv/TestSQLBatch.scala ---------------------------------------------------------------------- diff --git a/datamap/mv/plan/src/test/scala/org/apache/carbondata/mv/TestSQLBatch.scala b/datamap/mv/plan/src/test/scala/org/apache/carbondata/mv/TestSQLBatch.scala deleted file mode 100644 index 3806dac..0000000 --- a/datamap/mv/plan/src/test/scala/org/apache/carbondata/mv/TestSQLBatch.scala +++ /dev/null @@ -1,584 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.carbondata.mv - -object TestSQLBatch { - - val testSQLBatch = Seq[String]( - s""" - |SELECT f1.A,COUNT(*) AS B - |FROM - | fact f1 - | JOIN dim d1 ON (f1.K = d1.K) - |WHERE f1.E IS NULL AND (f1.C > d1.E OR d1.E = 3) - |GROUP BY f1.A - """.stripMargin.trim, - s""" - |SELECT f1.A,COUNT(*) AS B - |FROM - | fact f1 - | JOIN dim d1 ON (f1.K = d1.K) - | JOIN dim1 d2 ON (f1.K = d2.K AND d2.G > 0) - |WHERE f1.E IS NULL AND f1.C > d1.E - |GROUP BY f1.A - """.stripMargin.trim, - s""" - |SELECT substr(item.i_item_desc,1,30) itemdesc, item.i_item_sk item_sk, date_dim.d_date solddate, count(*) cnt - |FROM date_dim, store_sales, item - |WHERE date_dim.d_date_sk = store_sales.ss_sold_date_sk - | AND store_sales.ss_item_sk = item.i_item_sk - | AND date_dim.d_year in (2000, 2000+1, 2000+2, 2000+3) - |GROUP BY substr(item.i_item_desc,1,30), item.i_item_sk,date_dim.d_date - """.stripMargin.trim, - s""" - |SELECT item.i_item_desc, item.i_category, item.i_class, item.i_current_price, - | SUM(store_sales.ss_ext_sales_price) as itemrevenue, - | SUM(store_sales.ss_ext_sales_price)*100/sum(sum(store_sales.ss_ext_sales_price)) over (partition by item.i_class) as revenueratio - |FROM date_dim, store_sales, item - |WHERE date_dim.d_date_sk = store_sales.ss_sold_date_sk - | AND store_sales.ss_item_sk = item.i_item_sk - | AND item.i_category in ('Sport', 'Books', 'Home') - | AND date_dim.d_date between cast('1999-02-22' as date) AND (cast('1999-02-22' as date) + interval 30 days) - |GROUP BY item.i_item_id, item.i_item_desc, item.i_category, item.i_class, item.i_current_price - """.stripMargin.trim, - s""" - |SELECT 'store' channel, store_sales.ss_store_sk col_name, date_dim.d_year, date_dim.d_qoy, - | item.i_category, SUM(store_sales.ss_ext_sales_price) ext_sales_price - |FROM date_dim, store_sales, item - |WHERE store_sales.ss_store_sk IS NULL - | AND store_sales.ss_sold_date_sk = date_dim.d_date_sk - | AND store_sales.ss_item_sk = item.i_item_sk - |GROUP BY channel, store_sales.ss_store_sk, date_dim.d_year, date_dim.d_qoy, item.i_category - """.stripMargin.trim, - s""" - |SELECT 'store' channel, store_sales.ss_store_sk col_name, date_dim.d_year, date_dim.d_qoy, - | item.i_category, SUM(store_sales.ss_ext_sales_price) ext_sales_price - |FROM date_dim, store_sales, item - |WHERE store_sales.ss_store_sk IS NULL - | AND store_sales.ss_sold_date_sk = date_dim.d_date_sk - | AND store_sales.ss_item_sk = item.i_item_sk - |GROUP BY store_sales.ss_store_sk, date_dim.d_year, date_dim.d_qoy, item.i_category - """.stripMargin.trim, - s""" - |SELECT item.i_brand_id brand_id, item.i_brand brand, SUM(ss_ext_sales_price) ext_price - |FROM date_dim, store_sales, item - |WHERE store_sales.ss_sold_date_sk = date_dim.d_date_sk - | AND store_sales.ss_item_sk = item.i_item_sk - | AND item.i_manager_id = 28 - | AND date_dim.d_year = 1999 - | AND date_dim.d_moy = 11 - |GROUP BY item.i_brand_id, item.i_brand - """.stripMargin.trim, - s""" - |SELECT item.i_brand_id brand_id, item.i_brand_id brand, SUM(ss_ext_sales_price) ext_price - |FROM date_dim, store_sales, item - |WHERE store_sales.ss_sold_date_sk = date_dim.d_date_sk - | AND store_sales.ss_item_sk = item.i_item_sk - | AND item.i_manager_id = 28 - | AND date_dim.d_year = 1999 - | AND date_dim.d_moy = 11 - |GROUP BY item.i_brand_id, item.i_class_id,item.i_category_id - """.stripMargin.trim, - s""" - |SELECT 'store' channel, item.i_brand_id, item.i_class_id, item.i_category_id, - | SUM(store_sales.ss_quantity*store_sales.ss_list_price) sales, count(*) number_sales - |FROM date_dim, store_sales, item - |WHERE date_dim.d_date_sk = store_sales.ss_sold_date_sk - | AND store_sales.ss_item_sk = item.i_item_sk - | AND date_dim.d_year = 1999 + 2 - | AND date_dim.d_moy = 11 - |GROUP BY item.i_brand_id, item.i_class_id,item.i_category_id - """.stripMargin.trim, - s""" - |SELECT substr(item.i_item_desc,1,30) itemdesc, item.i_item_sk item_sk, dt.d_date solddate, count(*) cnt - |FROM date_dim dt, store_sales, item - |WHERE dt.d_date_sk = store_sales.ss_sold_date_sk - | AND store_sales.ss_item_sk = item.i_item_sk - | AND dt.d_year in (2000, 2000+1, 2000+2, 2000+3) - |GROUP BY substr(item.i_item_desc,1,30), item.i_item_sk,dt.d_date - """.stripMargin.trim, - s""" - |SELECT fact.B - |FROM - | fact - |UNION ALL - |SELECT fact.B - |FROM - | fact - |UNION ALL - |SELECT fact.B - |FROM - | fact - """.stripMargin.trim, - s""" - |SELECT fact.A - |FROM - | fact - | JOIN dim ON (fact.K = dim.K) - | JOIN dim1 ON (fact.K = dim1.K AND dim1.G > 0) - |WHERE fact.C > dim.E AND dim.E IS NULL - |UNION ALL - |SELECT fact.A - |FROM - | fact - | JOIN dim ON (fact.K = dim.K) - | JOIN dim1 ON (fact.K = dim1.K AND dim1.G > 0) - |WHERE fact.C > dim.E AND dim.E IS NULL - |UNION ALL - |SELECT fact.B - |FROM - | fact - """.stripMargin.trim, - s""" - |SELECT fact.A - |FROM - | fact - | JOIN dim ON (fact.K = dim.K) - | JOIN dim1 ON (fact.K = dim1.K AND dim1.G > 0) - |WHERE fact.C > dim.E AND dim.E IS NULL - |UNION ALL - |SELECT fact.A - |FROM - | fact - | JOIN dim ON (fact.K = dim.K) - | JOIN dim1 ON (fact.K = dim1.K AND dim1.G > 0) - |WHERE fact.C > dim.E AND dim.E IS NULL - """.stripMargin.trim, - s""" - |SELECT fact.A - |FROM - | fact - | JOIN dim ON (fact.K = dim.K) - | JOIN dim1 ON (fact.K = dim1.K AND dim1.G > 0) - |WHERE fact.C > dim.E AND dim.E IS NULL - |UNION ALL - |SELECT fact.B - |FROM - | fact - """.stripMargin.trim, - s""" - |SELECT fact.A - |FROM - | fact - | JOIN dim ON (fact.K = dim.K) - | JOIN dim1 ON (fact.K = dim1.K AND dim1.G > 0) - |WHERE fact.C > dim.E AND dim.E IS NULL - |UNION ALL - |SELECT fact.A - |FROM - | fact - """.stripMargin.trim, - s""" - |SELECT f1.A,f1.B,COUNT(*) AS A - |FROM - | fact f1 - | JOIN dim d1 ON (f1.K = d1.K) - |GROUP BY f1.A,f1.B - """.stripMargin.trim, - s""" - |SELECT f1.A,f1.B,COUNT(*) AS A - |FROM - | fact f1 - | JOIN dim d1 ON (f1.K = d1.K) - |WHERE f1.E IS NULL AND f1.C > d1.E AND f1.B = 2 - |GROUP BY f1.A,f1.B - """.stripMargin.trim, - s""" - |SELECT f1.A,f1.B,COUNT(*) AS A - |FROM - | fact f1 - | JOIN dim d1 ON (f1.K = d1.K) - |WHERE f1.E IS NULL AND f1.C > d1.E AND d1.E = 3 - |GROUP BY f1.A,f1.B - """.stripMargin.trim, - s""" - |SELECT f1.A,f1.B,COUNT(*) AS A - |FROM - | fact f1 - | JOIN dim d1 ON (f1.K = d1.K) - |WHERE f1.E IS NULL AND f1.C > d1.E - |GROUP BY f1.A,f1.B - """.stripMargin.trim, - s""" - |SELECT f1.A,f1.B,COUNT(*) AS A - |FROM - | fact f1 - | JOIN dim d1 ON (f1.K = d1.K) - | JOIN dim d2 ON (f1.K = d2.K AND d2.E > 0) - |WHERE f1.E IS NULL AND f1.C > d1.E - |GROUP BY f1.A,f1.B - """.stripMargin.trim, - s""" - |SELECT fact.A,fact.B,COUNT(*) AS A - |FROM - | fact - | JOIN dim d1 ON (fact.K = d1.K) - | JOIN dim d2 ON (fact.K = d2.K AND d2.E > 0) - |WHERE fact.E IS NULL AND fact.C > d1.E - |GROUP BY fact.A,fact.B - """.stripMargin.trim, - s""" - |SELECT fact.A - |FROM - | fact - | JOIN dim ON (fact.K = dim.K) - | JOIN dim1 ON (fact.K = dim1.K) - |WHERE fact.C > dim.E AND (dim.E IS NULL OR dim1.G IS NULL) - """.stripMargin.trim, - s""" - |SELECT fact.A - |FROM - | fact - | JOIN dim ON (fact.K = dim.K) - | JOIN dim1 ON (fact.K = dim1.K) - |WHERE fact.C > dim.E OR dim1.G IS NULL - """.stripMargin.trim, - s""" - |SELECT fact.A - |FROM - | fact - | JOIN dim ON (fact.K = dim.K) - | JOIN dim1 ON (fact.K = dim1.K AND dim1.G > 0) - |WHERE fact.C > dim.E OR dim.E IS NULL - """.stripMargin.trim, - s""" - |SELECT fact.A - |FROM - | fact - | JOIN dim ON (fact.K = dim.K) - | JOIN dim1 ON (fact.K = dim1.K AND dim1.G > 0) - |WHERE fact.C > dim.E AND dim.E IS NULL - """.stripMargin.trim, - s""" - |SELECT fact.A - |FROM - | fact - | JOIN dim ON (fact.K = dim.K) - | JOIN dim1 ON (fact.K = dim1.K AND dim1.G > 0) - |WHERE fact.C > dim.E - """.stripMargin.trim, - s""" - |SELECT fact.A - |FROM - | fact - | JOIN dim ON (fact.K = dim.K) - | JOIN dim1 ON (fact.K = dim1.K AND dim1.G > 0) - """.stripMargin.trim, - s""" - |SELECT fact.A,fact.B,COUNT(*) AS A - |FROM - | fact - | JOIN dim ON (fact.K = dim.K AND fact.K IS NOT NULL) - | JOIN dim1 ON (fact.K = dim1.K AND dim1.G > 0 AND dim1.K IS NOT NULL) - |WHERE fact.E IS NULL AND fact.C > dim.E - |GROUP BY fact.A,fact.B - """.stripMargin.trim, - s""" - |SELECT fact.A,fact.B,COUNT(*) AS A - |FROM - | fact - | JOIN dim ON (fact.K = dim.K AND fact.K IS NOT NULL) - | JOIN dim1 ON (fact.K = dim1.K AND dim1.G > 0) - |WHERE fact.E IS NULL AND fact.C > dim.E - |GROUP BY fact.A,fact.B - """.stripMargin.trim, - s""" - |SELECT fact.A,fact.B,COUNT(*) AS A - |FROM - | fact - | JOIN dim ON (fact.K = dim.K) - | JOIN dim1 ON (fact.K = dim1.K AND dim1.G > 0) - |WHERE fact.E IS NULL AND fact.C > dim.E - |GROUP BY fact.A,fact.B - """.stripMargin.trim, - s""" - |SELECT fact.A,fact.B,COUNT(*) AS A - |FROM - | fact - | JOIN dim ON (fact.K = dim.K) - | JOIN dim1 ON (fact.K = dim1.K AND dim1.G > 0) - |WHERE fact.C > fact.E AND fact.C > dim.E - |GROUP BY fact.A,fact.B - """.stripMargin.trim, - s""" - |SELECT fact.A,fact.B,COUNT(*) AS A - |FROM - | fact - | JOIN dim ON (fact.K = dim.K) - | JOIN dim1 ON (fact.K = dim1.K) - |WHERE fact.C > fact.E AND (fact.C > dim.E OR dim1.G > 0) - |GROUP BY fact.A,fact.B - """.stripMargin.trim, - s""" - |SELECT fact.A,fact.B,COUNT(*) AS A - |FROM - | fact - | JOIN dim ON (fact.K = dim.K) - | JOIN dim1 ON (fact.K = dim1.K) - |WHERE fact.C > fact.E AND fact.C > dim.E OR dim1.G > 0 - |GROUP BY fact.A,fact.B - """.stripMargin.trim, - s""" - |SELECT fact.A,fact.B,COUNT(*) AS A - |FROM - | fact - | JOIN dim ON (fact.K = dim.K) - | JOIN dim1 ON (fact.K = dim1.K) - |WHERE fact.C > fact.E AND fact.C > dim.E - |GROUP BY fact.A,fact.B - """.stripMargin.trim, - s""" - |SELECT fact.A,fact.B,COUNT(*) AS A - |FROM - | fact - | JOIN dim ON (fact.K = dim.K) - | JOIN dim1 ON (fact.K = dim1.K) - |WHERE fact.C > fact.E OR fact.C > dim.E - |GROUP BY fact.A,fact.B - """.stripMargin.trim, - s""" - |SELECT fact.A,fact.B,COUNT(*) AS A - |FROM - | fact - | JOIN dim ON (fact.K = dim.K) - | JOIN dim1 ON (fact.K = dim1.K) - |WHERE fact.C > fact.E - |GROUP BY fact.A,fact.B - """.stripMargin.trim, - s""" - |SELECT fact.A,fact.B,COUNT(*) AS A - |FROM - | fact - | JOIN dim ON (fact.K = dim.K) - | JOIN dim1 ON (fact.K = dim1.K) - |GROUP BY fact.A,fact.B - """.stripMargin.trim, - s""" - |SELECT fact.A,COUNT(*) AS S1 - |FROM - | fact - | JOIN dim ON (fact.K = dim.K) - | JOIN dim1 ON (fact.K = dim1.K) - |GROUP BY fact.A - |--HAVING COUNT(*) > 5 - """.stripMargin.trim, - s""" - |SELECT fact.A,COUNT(*)--, my_fun(3) AS S1 - |FROM - | fact - | JOIN dim ON (fact.K = dim.K) - | JOIN dim1 ON (fact.K = dim1.K) - |GROUP BY fact.A - """.stripMargin.trim, - s""" - |SELECT fact.A,COUNT(*) AS S1 - |FROM - | fact - | JOIN dim ON (fact.K = dim.K) - | JOIN dim1 ON (fact.K = dim1.K) - |GROUP BY fact.A - """.stripMargin.trim, - s""" - |SELECT fact.A,sum(cast(dim.D as bigint)) AS S1 - |FROM - | fact - | JOIN dim ON (fact.K = dim.K) - | JOIN dim1 ON (fact.K = dim1.K) - |GROUP BY fact.A - """.stripMargin.trim, - s""" - |SELECT FOO.A, sum(cast(FOO.B as bigint)) AS S - |FROM (SELECT fact.A, fact.B - | FROM - | fact - | JOIN dim ON (fact.K = dim.K)) FOO - |GROUP BY FOO.A - """.stripMargin.trim, - s""" - |SELECT FOO.A, sum(cast(FOO.B as bigint)) AS S - |FROM (SELECT fact.A, fact.B - | FROM - | fact - | JOIN dim ON (fact.K = dim.K)) FOO - |GROUP BY FOO.A - """.stripMargin.trim, - s""" - |SELECT f1.A,f1.B,COUNT(*) - |FROM - | fact f1 - | JOIN fact f2 ON (f1.K = f2.K) - | JOIN fact f3 ON (f1.K = f3.K) - |GROUP BY f1.A,f1.B - """.stripMargin.trim, - s""" - |SELECT fact.A,fact.B,sum(cast(dim.D as bigint)) AS S1 - |FROM - | fact - | LEFT OUTER JOIN dim ON (fact.K = dim.K) - |GROUP BY fact.A,fact.B - """.stripMargin.trim, - s""" - |SELECT fact.A,fact.B,fact.C,sum(cast(dim.D as bigint)) AS S1 - |FROM - | fact - | LEFT OUTER JOIN dim ON (fact.K = dim.K) - |GROUP BY fact.A,fact.B,fact.C - """.stripMargin.trim, -// s""" -// |SELECT * -// |FROM fact, dim -// """.stripMargin.trim, - s""" - |SELECT store_sales.ss_store_sk,date_dim.d_year, - | COUNT(*) numsales - |FROM date_dim, store_sales - |WHERE date_dim.d_date_sk = store_sales.ss_sold_date_sk - |GROUP BY store_sales.ss_store_sk,date_dim.d_year GROUPING SETS (store_sales.ss_store_sk,date_dim.d_year) - """.stripMargin.trim, - s""" - |SELECT store_sales.ss_store_sk,date_dim.d_year, - | SUM(store_sales.ss_ext_sales_price) as itemrevenue - |FROM date_dim, store_sales - |WHERE date_dim.d_date_sk = store_sales.ss_sold_date_sk - |GROUP BY CUBE(store_sales.ss_store_sk,date_dim.d_year) - """.stripMargin.trim, - s""" - |SELECT date_dim.d_moy,date_dim.d_qoy, date_dim.d_year, - | SUM(store_sales.ss_ext_sales_price) as itemrevenue - |FROM date_dim, store_sales - |WHERE date_dim.d_date_sk = store_sales.ss_sold_date_sk - |GROUP BY ROLLUP(date_dim.d_moy,date_dim.d_qoy, date_dim.d_year) - """.stripMargin.trim - ) - val testSQLBatch2 = Seq[String]( - s""" - |SELECT f1.A,COUNT(*) AS B - |FROM - | fact f1 - | JOIN dim d1 ON (f1.K = d1.K) - |WHERE f1.E IS NULL AND (f1.C > d1.E OR d1.E = 3) - |GROUP BY f1.A - """.stripMargin.trim, - s""" - |SELECT f1.A,COUNT(*) AS B - |FROM - | fact f1 - | JOIN dim d1 ON (f1.K = d1.K) - | JOIN dim1 d2 ON (f1.K = d2.K AND d2.G > 0) - |WHERE f1.E IS NULL AND f1.C > d1.E - |GROUP BY f1.A - """.stripMargin.trim, - s""" - |SELECT substr(item.i_item_desc,1,30) itemdesc, item.i_item_sk item_sk, date_dim.d_date solddate, count(*) cnt - |FROM date_dim, store_sales, item - |WHERE date_dim.d_date_sk = store_sales.ss_sold_date_sk - | AND store_sales.ss_item_sk = item.i_item_sk - | AND date_dim.d_year in (2000, 2000+1, 2000+2, 2000+3) - |GROUP BY substr(item.i_item_desc,1,30), item.i_item_sk,date_dim.d_date - """.stripMargin.trim, - s""" - |SELECT item.i_item_desc, item.i_category, item.i_class, item.i_current_price, - | SUM(store_sales.ss_ext_sales_price) as itemrevenue, - | SUM(store_sales.ss_ext_sales_price)*100/sum(sum(store_sales.ss_ext_sales_price)) over (partition by item.i_class) as revenueratio - |FROM date_dim, store_sales, item - |WHERE date_dim.d_date_sk = store_sales.ss_sold_date_sk - | AND store_sales.ss_item_sk = item.i_item_sk - | AND item.i_category in ('Sport', 'Books', 'Home') - | AND date_dim.d_date between cast('1999-02-22' as date) AND (cast('1999-02-22' as date) + interval 30 days) - |GROUP BY item.i_item_id, item.i_item_desc, item.i_category, item.i_class, item.i_current_price - """.stripMargin.trim, - s""" - |SELECT 'store' channel, store_sales.ss_store_sk col_name, date_dim.d_year, date_dim.d_qoy, - | item.i_category, SUM(store_sales.ss_ext_sales_price) ext_sales_price - |FROM date_dim, store_sales, item - |WHERE store_sales.ss_store_sk IS NULL - | AND store_sales.ss_sold_date_sk = date_dim.d_date_sk - | AND store_sales.ss_item_sk = item.i_item_sk - |GROUP BY channel, store_sales.ss_store_sk, date_dim.d_year, date_dim.d_qoy, item.i_category - """.stripMargin.trim, - s""" - |SELECT 'store' channel, store_sales.ss_store_sk col_name, date_dim.d_year, date_dim.d_qoy, - | item.i_category, SUM(store_sales.ss_ext_sales_price) ext_sales_price - |FROM date_dim, store_sales, item - |WHERE store_sales.ss_store_sk IS NULL - | AND store_sales.ss_sold_date_sk = date_dim.d_date_sk - | AND store_sales.ss_item_sk = item.i_item_sk - |GROUP BY store_sales.ss_store_sk, date_dim.d_year, date_dim.d_qoy, item.i_category - """.stripMargin.trim, - s""" - |SELECT item.i_brand_id brand_id, item.i_brand brand, SUM(ss_ext_sales_price) ext_price - |FROM date_dim, store_sales, item - |WHERE store_sales.ss_sold_date_sk = date_dim.d_date_sk - | AND store_sales.ss_item_sk = item.i_item_sk - | AND item.i_manager_id = 28 - | AND date_dim.d_year = 1999 - | AND date_dim.d_moy = 11 - |GROUP BY item.i_brand_id, item.i_brand - """.stripMargin.trim, - s""" - |SELECT item.i_brand_id brand_id, item.i_brand_id brand, SUM(ss_ext_sales_price) ext_price - |FROM date_dim, store_sales, item - |WHERE store_sales.ss_sold_date_sk = date_dim.d_date_sk - | AND store_sales.ss_item_sk = item.i_item_sk - | AND item.i_manager_id = 28 - | AND date_dim.d_year = 1999 - | AND date_dim.d_moy = 11 - |GROUP BY item.i_brand_id, item.i_class_id,item.i_category_id - """.stripMargin.trim, - s""" - |SELECT 'store' channel, item.i_brand_id, item.i_class_id, item.i_category_id, - | SUM(store_sales.ss_quantity*store_sales.ss_list_price) sales, count(*) number_sales - |FROM date_dim, store_sales, item - |WHERE date_dim.d_date_sk = store_sales.ss_sold_date_sk - | AND store_sales.ss_item_sk = item.i_item_sk - | AND date_dim.d_year = 1999 + 2 - | AND date_dim.d_moy = 11 - |GROUP BY item.i_brand_id, item.i_class_id,item.i_category_id - """.stripMargin.trim, - s""" - |SELECT substr(item.i_item_desc,1,30) itemdesc, item.i_item_sk item_sk, dt.d_date solddate, count(*) cnt - |FROM date_dim dt, store_sales, item - |WHERE dt.d_date_sk = store_sales.ss_sold_date_sk - | AND store_sales.ss_item_sk = item.i_item_sk - | AND dt.d_year in (2000, 2000+1, 2000+2, 2000+3) - |GROUP BY substr(item.i_item_desc,1,30), item.i_item_sk,dt.d_date - """.stripMargin.trim, - - s""" - |SELECT store_sales.ss_store_sk,date_dim.d_year, - | COUNT(*) numsales - |FROM date_dim, store_sales - |WHERE date_dim.d_date_sk = store_sales.ss_sold_date_sk - |GROUP BY store_sales.ss_store_sk,date_dim.d_year GROUPING SETS (store_sales.ss_store_sk,date_dim.d_year) - """.stripMargin.trim, - s""" - |SELECT store_sales.ss_store_sk,date_dim.d_year, - | SUM(store_sales.ss_ext_sales_price) as itemrevenue - |FROM date_dim, store_sales - |WHERE date_dim.d_date_sk = store_sales.ss_sold_date_sk - |GROUP BY CUBE(store_sales.ss_store_sk,date_dim.d_year) - """.stripMargin.trim, - s""" - |SELECT date_dim.d_moy,date_dim.d_qoy, date_dim.d_year, - | SUM(store_sales.ss_ext_sales_price) as itemrevenue - |FROM date_dim, store_sales - |WHERE date_dim.d_date_sk = store_sales.ss_sold_date_sk - |GROUP BY ROLLUP(date_dim.d_moy,date_dim.d_qoy, date_dim.d_year) - """.stripMargin.trim - ) -} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/carbondata/blob/0ef7e55c/datamap/mv/plan/src/test/scala/org/apache/carbondata/mv/plans/ExtractJoinConditionsSuite.scala ---------------------------------------------------------------------- diff --git a/datamap/mv/plan/src/test/scala/org/apache/carbondata/mv/plans/ExtractJoinConditionsSuite.scala b/datamap/mv/plan/src/test/scala/org/apache/carbondata/mv/plans/ExtractJoinConditionsSuite.scala index 2e91e80..e1a3d9f 100644 --- a/datamap/mv/plan/src/test/scala/org/apache/carbondata/mv/plans/ExtractJoinConditionsSuite.scala +++ b/datamap/mv/plan/src/test/scala/org/apache/carbondata/mv/plans/ExtractJoinConditionsSuite.scala @@ -22,7 +22,7 @@ import org.apache.spark.sql.catalyst.dsl.plans._ import org.apache.spark.sql.catalyst.plans.logical.LocalRelation import org.apache.spark.sql.catalyst.plans.{Inner, _} -import org.apache.carbondata.mv.dsl._ +import org.apache.carbondata.mv.dsl.Plans._ import org.apache.carbondata.mv.testutil.ModularPlanTest class ExtractJoinConditionsSuite extends ModularPlanTest { http://git-wip-us.apache.org/repos/asf/carbondata/blob/0ef7e55c/datamap/mv/plan/src/test/scala/org/apache/carbondata/mv/plans/IsSPJGHSuite.scala ---------------------------------------------------------------------- diff --git a/datamap/mv/plan/src/test/scala/org/apache/carbondata/mv/plans/IsSPJGHSuite.scala b/datamap/mv/plan/src/test/scala/org/apache/carbondata/mv/plans/IsSPJGHSuite.scala index e80a0cb..dbc1756 100644 --- a/datamap/mv/plan/src/test/scala/org/apache/carbondata/mv/plans/IsSPJGHSuite.scala +++ b/datamap/mv/plan/src/test/scala/org/apache/carbondata/mv/plans/IsSPJGHSuite.scala @@ -20,8 +20,7 @@ package org.apache.carbondata.mv.plans import org.apache.spark.sql.catalyst.dsl.expressions._ import org.apache.spark.sql.catalyst.dsl.plans._ import org.apache.spark.sql.catalyst.plans.logical.LocalRelation - -import org.apache.carbondata.mv.dsl._ +import org.apache.carbondata.mv.dsl.Plans._ import org.apache.carbondata.mv.plans.modular.ModularPlan import org.apache.carbondata.mv.testutil.ModularPlanTest http://git-wip-us.apache.org/repos/asf/carbondata/blob/0ef7e55c/datamap/mv/plan/src/test/scala/org/apache/carbondata/mv/plans/LogicalToModularPlanSuite.scala ---------------------------------------------------------------------- diff --git a/datamap/mv/plan/src/test/scala/org/apache/carbondata/mv/plans/LogicalToModularPlanSuite.scala b/datamap/mv/plan/src/test/scala/org/apache/carbondata/mv/plans/LogicalToModularPlanSuite.scala index 176c5d2..082c325 100644 --- a/datamap/mv/plan/src/test/scala/org/apache/carbondata/mv/plans/LogicalToModularPlanSuite.scala +++ b/datamap/mv/plan/src/test/scala/org/apache/carbondata/mv/plans/LogicalToModularPlanSuite.scala @@ -23,8 +23,7 @@ import org.apache.spark.sql.catalyst.dsl.plans._ import org.apache.spark.sql.catalyst.expressions.aggregate.Count import org.apache.spark.sql.catalyst.plans.logical._ import org.apache.spark.sql.catalyst.plans.{LeftOuter, RightOuter, _} - -import org.apache.carbondata.mv.dsl._ +import org.apache.carbondata.mv.dsl.Plans._ import org.apache.carbondata.mv.plans.modular.Flags._ import org.apache.carbondata.mv.plans.modular.{JoinEdge, ModularRelation} import org.apache.carbondata.mv.testutil.ModularPlanTest http://git-wip-us.apache.org/repos/asf/carbondata/blob/0ef7e55c/datamap/mv/plan/src/test/scala/org/apache/carbondata/mv/plans/ModularToSQLSuite.scala ---------------------------------------------------------------------- diff --git a/datamap/mv/plan/src/test/scala/org/apache/carbondata/mv/plans/ModularToSQLSuite.scala b/datamap/mv/plan/src/test/scala/org/apache/carbondata/mv/plans/ModularToSQLSuite.scala index 7cd3d73..c74491c 100644 --- a/datamap/mv/plan/src/test/scala/org/apache/carbondata/mv/plans/ModularToSQLSuite.scala +++ b/datamap/mv/plan/src/test/scala/org/apache/carbondata/mv/plans/ModularToSQLSuite.scala @@ -17,149 +17,133 @@ package org.apache.carbondata.mv.plans -import org.apache.spark.sql.SparkSession -import org.scalatest.{BeforeAndAfter, BeforeAndAfterAll} +import org.apache.spark.sql.hive.CarbonSessionCatalog +import org.scalatest.BeforeAndAfter -import org.apache.carbondata.mv.dsl._ +import org.apache.carbondata.mv.dsl.Plans._ import org.apache.carbondata.mv.testutil.ModularPlanTest -class ModularToSQLSuite extends ModularPlanTest with BeforeAndAfterAll { - import org.apache.carbondata.mv.TestSQLBatch._ +class ModularToSQLSuite extends ModularPlanTest with BeforeAndAfter { - override protected def beforeAll(): Unit = { - drop + import org.apache.carbondata.mv.testutil.TestSQLBatch._ - sql( - s""" - |CREATE TABLE Fact ( - | `A` int, - | `B` int, - | `C` int, - | `E` int, - | `K` int - |) - |ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' - |STORED AS TEXTFILE + val spark = sqlContext + val testHive = sqlContext.sparkSession + val hiveClient = spark.sparkSession.sessionState.catalog.asInstanceOf[CarbonSessionCatalog].getClient() + + ignore("convert modular plans to sqls") { + + hiveClient.runSqlHive( + s""" + |CREATE TABLE if not exists Fact ( + | `A` int, + | `B` int, + | `C` int, + | `E` int, + | `K` int + |) + |ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' + |STORED AS TEXTFILE """.stripMargin.trim - ) - - sql( - s""" - |CREATE TABLE Dim ( - | `D` int, - | `E` int, - | `K` int - |) - |ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' - |STORED AS TEXTFILE + ) + + hiveClient.runSqlHive( + s""" + |CREATE TABLE if not exists Dim ( + | `D` int, + | `E` int, + | `K` int + |) + |ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' + |STORED AS TEXTFILE """.stripMargin.trim - ) - - sql( - s""" - |CREATE TABLE Dim1 ( - | `F` int, - | `G` int, - | `K` int - |) - |ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' - |STORED AS TEXTFILE + ) + + hiveClient.runSqlHive( + s""" + |CREATE TABLE if not exists Dim1 ( + | `F` int, + | `G` int, + | `K` int + |) + |ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' + |STORED AS TEXTFILE """.stripMargin.trim - ) - - sql( - s""" - |CREATE TABLE store_sales ( - | `ss_sold_date_sk` int, - | `ss_item_sk` int, - | `ss_quantity` int, - | `ss_list_price` decimal(7,2), - | `ss_ext_sales_price` decimal(7,2), - | `ss_store_sk` int - |) - |ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' - |STORED AS TEXTFILE + ) + + hiveClient.runSqlHive( + s""" + |CREATE TABLE if not exists store_sales ( + | `ss_sold_date_sk` int, + | `ss_item_sk` int, + | `ss_quantity` int, + | `ss_list_price` decimal(7,2), + | `ss_ext_sales_price` decimal(7,2), + | `ss_store_sk` int + |) + |ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' + |STORED AS TEXTFILE """.stripMargin.trim ) - - sql( - s""" - |CREATE TABLE date_dim ( - | `d_date_sk` int, - | `d_date` date, - | `d_year` int, - | `d_moy` int, - | `d_qoy` int - |) - |ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' - |STORED AS TEXTFILE + + hiveClient.runSqlHive( + s""" + |CREATE TABLE if not exists date_dim ( + | `d_date_sk` int, + | `d_date` date, + | `d_year` int, + | `d_moy` int, + | `d_qoy` int + |) + |ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' + |STORED AS TEXTFILE """.stripMargin.trim ) - - sql( - s""" - |CREATE TABLE item ( - | `i_item_sk` int, - | `i_item_id` string, - | `i_brand` string, - | `i_brand_id` int, - | `i_item_desc` string, - | `i_class_id` int, - | `i_class` string, - | `i_category` string, - | `i_category_id` int, - | `i_manager_id` int, - | `i_current_price` decimal(7,2) - |) - |ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' - |STORED AS TEXTFILE + + hiveClient.runSqlHive( + s""" + |CREATE TABLE if not exists item ( + | `i_item_sk` int, + | `i_item_id` string, + | `i_brand` string, + | `i_brand_id` int, + | `i_item_desc` string, + | `i_class_id` int, + | `i_class` string, + | `i_category` string, + | `i_category_id` int, + | `i_manager_id` int, + | `i_current_price` decimal(7,2) + |) + |ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' + |STORED AS TEXTFILE """.stripMargin.trim ) - - sqlContext.udf.register("my_fun", (s: Integer) => s) - } - - - private def drop = { - sql(s"drop table if exists Fact") - sql(s"drop table if exists Dim") - sql(s"drop table if exists Dim1") - sql(s"drop table if exists store_sales") - sql(s"drop table if exists date_dim") - sql(s"drop table if exists item") - } - - ignore("convert modular plans to sqls") { + + testHive.udf.register("my_fun", (s: Integer) => s) + testSQLBatch.foreach { query => - testPlan(query) - } - } - - private def testPlan(query: String) = { - val analyzed = sql(query).queryExecution.analyzed - val optimized = analyzed.optimize - val modularPlan = analyzed.optimize.modularize + val analyzed = testHive.sql(query).queryExecution.analyzed + val optimized = analyzed.optimize + val modularPlan = analyzed.optimize.modularize - println(s"\n\n===== ACTUAL QUERY =====\n\n${ query } \n") + LOGGER.info(s"\n\n===== MODULAR PLAN =====\n\n${modularPlan.treeString} \n") + + val compactSql = modularPlan.asCompactSQL + val convertedSql = modularPlan.asOneLineSQL - println(s"\n\n===== MODULAR PLAN =====\n\n${ modularPlan.treeString } \n") + LOGGER.info(s"\n\n===== CONVERTED SQL =====\n\n$compactSql \n") + + val analyzed1 = testHive.sql(convertedSql).queryExecution.analyzed + val modularPlan1 = analyzed1.optimize.modularize - val compactSql = modularPlan.asCompactSQL - val convertedSql = modularPlan.asOneLineSQL + LOGGER.info(s"\n\n===== CONVERTED SQL =====\n\n$compactSql \n") - println(s"\n\n===== CONVERTED SQL =====\n\n$compactSql \n") - - val analyzed1 = sql(convertedSql).queryExecution.analyzed - val modularPlan1 = analyzed1.optimize.modularize - - println(s"\n\n===== CONVERTED SQL =====\n\n$compactSql \n") - - println(s"\n\n===== MODULAR PLAN1 =====\n\n${ modularPlan1.treeString } \n") - - comparePlans(modularPlan, modularPlan1) - } + LOGGER.info(s"\n\n===== MODULAR PLAN1 =====\n\n${modularPlan1.treeString} \n") + + comparePlans(modularPlan, modularPlan1) + } - override protected def afterAll(): Unit = { - drop } + } \ No newline at end of file http://git-wip-us.apache.org/repos/asf/carbondata/blob/0ef7e55c/datamap/mv/plan/src/test/scala/org/apache/carbondata/mv/plans/SignatureSuite.scala ---------------------------------------------------------------------- diff --git a/datamap/mv/plan/src/test/scala/org/apache/carbondata/mv/plans/SignatureSuite.scala b/datamap/mv/plan/src/test/scala/org/apache/carbondata/mv/plans/SignatureSuite.scala index c64826f..631eca2 100644 --- a/datamap/mv/plan/src/test/scala/org/apache/carbondata/mv/plans/SignatureSuite.scala +++ b/datamap/mv/plan/src/test/scala/org/apache/carbondata/mv/plans/SignatureSuite.scala @@ -18,87 +18,60 @@ package org.apache.carbondata.mv.plans import org.apache.spark.sql.catalyst.util._ +import org.apache.spark.sql.hive.CarbonSessionCatalog import org.scalatest.BeforeAndAfterAll -import org.apache.carbondata.mv.dsl._ +import org.apache.carbondata.mv.dsl.Plans._ import org.apache.carbondata.mv.plans.modular.ModularPlanSignatureGenerator import org.apache.carbondata.mv.testutil.ModularPlanTest -import org.apache.carbondata.mv.testutil.Tpcds_1_4_Tables.tpcds1_4Tables class SignatureSuite extends ModularPlanTest with BeforeAndAfterAll { - import org.apache.carbondata.mv.TestSQLBatch._ + import org.apache.carbondata.mv.testutil.TestSQLBatch._ - override protected def beforeAll(): Unit = { - sql("drop database if exists tpcds1 cascade") - sql("create database tpcds1") - sql("use tpcds1") - tpcds1_4Tables.foreach { create_table => - sql(create_table) - } + val spark = sqlContext + val testHive = sqlContext.sparkSession + val hiveClient = spark.sparkSession.sessionState.catalog.asInstanceOf[CarbonSessionCatalog].getClient() + + ignore("test signature computing") { - sql( - s""" - |CREATE TABLE Fact ( - | `A` int, - | `B` int, - | `C` int, - | `E` int, - | `K` int - |) - |ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' - |STORED AS TEXTFILE + hiveClient.runSqlHive( + s""" + |CREATE TABLE if not exists Fact ( + | `A` int, + | `B` int, + | `C` int, + | `K` int + |) + |ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' + |STORED AS TEXTFILE """.stripMargin.trim - ) - - sql( - s""" - |CREATE TABLE Dim ( - | `D` int, - | `E` int, - | `K` int - |) - |ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' - |STORED AS TEXTFILE + ) + + hiveClient.runSqlHive( + s""" + |CREATE TABLE if not exists Dim ( + | `D` int, + | `K` int + |) + |ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' + |STORED AS TEXTFILE """.stripMargin.trim - ) - - sql( - s""" - |CREATE TABLE Dim1 ( - | `F` int, - | `G` int, - | `K` int - |) - |ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' - |STORED AS TEXTFILE - """.stripMargin.trim - ) - - sqlContext.udf.register("my_fun", (s: Integer) => s) - } - - - test("test signature computing") { - + ) + testSQLBatch.foreach { query => - val analyzed = sql(query).queryExecution.analyzed + val analyzed = testHive.sql(query).queryExecution.analyzed val modularPlan = analyzed.optimize.modularize val sig = ModularPlanSignatureGenerator.generate(modularPlan) sig match { case Some(s) if (s.groupby != true || s.datasets != Set("default.fact","default.dim")) => - println( + fail( s""" |=== FAIL: signature do not match === |${sideBySide(s.groupby.toString, true.toString).mkString("\n")} |${sideBySide(s.datasets.toString, Set("Fact","Dim").toString).mkString("\n")} """.stripMargin) case _ => - } + } } - } - - override protected def afterAll(): Unit = { - sql("use default") - sql("drop database if exists tpcds1 cascade") - } + } } \ No newline at end of file http://git-wip-us.apache.org/repos/asf/carbondata/blob/0ef7e55c/datamap/mv/plan/src/test/scala/org/apache/carbondata/mv/plans/Tpcds_1_4_BenchmarkSuite.scala ---------------------------------------------------------------------- diff --git a/datamap/mv/plan/src/test/scala/org/apache/carbondata/mv/plans/Tpcds_1_4_BenchmarkSuite.scala b/datamap/mv/plan/src/test/scala/org/apache/carbondata/mv/plans/Tpcds_1_4_BenchmarkSuite.scala new file mode 100644 index 0000000..f8441f9 --- /dev/null +++ b/datamap/mv/plan/src/test/scala/org/apache/carbondata/mv/plans/Tpcds_1_4_BenchmarkSuite.scala @@ -0,0 +1,86 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.carbondata.mv.plans + +import scala.util.{Failure, Success, Try} +import org.apache.spark.sql.SparkSession +import org.scalatest.BeforeAndAfter +import org.apache.carbondata.mv.dsl._ +import org.apache.carbondata.mv.testutil.ModularPlanTest + +// scalastyle:off println +class Tpcds_1_4_BenchmarkSuite extends ModularPlanTest with BeforeAndAfter { + import org.apache.carbondata.mv.testutil.Tpcds_1_4_QueryBatch._ + import org.apache.carbondata.mv.testutil.Tpcds_1_4_Tables._ + +// val spark = SparkSession.builder().master("local").enableHiveSupport().getOrCreate() +// // spark.conf.set("spark.sql.crossJoin.enabled", true) +// val testHive = new org.apache.spark.sql.hive.test.TestHiveContext(spark.sparkContext, false) +// val hiveClient = testHive.sparkSession.metadataHive + +// test("test SQLBuilder using tpc-ds queries") { +// +// tpcds1_4Tables.foreach { create_table => +// hiveClient.runSqlHive(create_table) +// } +// +//// val dest = "qTradeflow" // this line is for development, comment it out once done +// val dest = "qSEQ" +//// val dest = "qAggPushDown" // this line is for development, comment it out once done +//// val dest = "q10" +// +// tpcds1_4Queries.foreach { query => +// if (query._1 == dest) { // this line is for development, comment it out once done +// val analyzed = testHive.sql(query._2).queryExecution.analyzed +// println(s"""\n\n===== Analyzed Logical Plan for ${query._1} =====\n\n$analyzed \n""") +// +//// val cnonicalizedPlan = new SQLBuilder(analyzed).Canonicalizer.execute(analyzed) +//// +//// Try(new SQLBuilder(analyzed).toSQL) match { +//// case Success(s) => logInfo(s"""\n\n===== CONVERTED back ${query._1} USING SQLBuilder =====\n\n$s \n""") +//// case Failure(e) => logInfo(s"""Cannot convert the logical query plan of ${query._1} back to SQL""") +//// } +// +// // this Try is for development, comment it out once done +// Try(analyzed.optimize) match { +// case Success(o) => { +// println(s"""\n\n===== Optimized Logical Plan for ${query._1} =====\n\n$o \n""") +// } +// case Failure(e) => +// } +// +// val o = analyzed.optimize +// val o1 = o.modularize +// +// Try(o.modularize.harmonize) match { +// case Success(m) => { +// println(s"""\n\n===== MODULAR PLAN for ${query._1} =====\n\n$m \n""") +// +// Try(m.asCompactSQL) match { +// case Success(s) => println(s"\n\n===== CONVERTED SQL for ${query._1} =====\n\n${s}\n") +// case Failure(e) => println(s"""\n\n===== CONVERTED SQL for ${query._1} failed =====\n\n${e.toString}""") +// } +// } +// case Failure(e) => println(s"""\n\n==== MODULARIZE the logical query plan for ${query._1} failed =====\n\n${e.toString}""") +// } +// } +// } +// +// } +} +// scalastyle:on println \ No newline at end of file http://git-wip-us.apache.org/repos/asf/carbondata/blob/0ef7e55c/datamap/mv/plan/src/test/scala/org/apache/carbondata/mv/testutil/TestSQLBatch.scala ---------------------------------------------------------------------- diff --git a/datamap/mv/plan/src/test/scala/org/apache/carbondata/mv/testutil/TestSQLBatch.scala b/datamap/mv/plan/src/test/scala/org/apache/carbondata/mv/testutil/TestSQLBatch.scala new file mode 100644 index 0000000..bb90340 --- /dev/null +++ b/datamap/mv/plan/src/test/scala/org/apache/carbondata/mv/testutil/TestSQLBatch.scala @@ -0,0 +1,584 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.carbondata.mv.testutil + +object TestSQLBatch { + + val testSQLBatch = Seq[String]( + s""" + |SELECT f1.A,COUNT(*) AS B + |FROM + | fact f1 + | JOIN dim d1 ON (f1.K = d1.K) + |WHERE f1.E IS NULL AND (f1.C > d1.E OR d1.E = 3) + |GROUP BY f1.A + """.stripMargin.trim, + s""" + |SELECT f1.A,COUNT(*) AS B + |FROM + | fact f1 + | JOIN dim d1 ON (f1.K = d1.K) + | JOIN dim1 d2 ON (f1.K = d2.K AND d2.G > 0) + |WHERE f1.E IS NULL AND f1.C > d1.E + |GROUP BY f1.A + """.stripMargin.trim, + s""" + |SELECT substr(item.i_item_desc,1,30) itemdesc, item.i_item_sk item_sk, date_dim.d_date solddate, count(*) cnt + |FROM date_dim, store_sales, item + |WHERE date_dim.d_date_sk = store_sales.ss_sold_date_sk + | AND store_sales.ss_item_sk = item.i_item_sk + | AND date_dim.d_year in (2000, 2000+1, 2000+2, 2000+3) + |GROUP BY substr(item.i_item_desc,1,30), item.i_item_sk,date_dim.d_date + """.stripMargin.trim, + s""" + |SELECT item.i_item_desc, item.i_category, item.i_class, item.i_current_price, + | SUM(store_sales.ss_ext_sales_price) as itemrevenue, + | SUM(store_sales.ss_ext_sales_price)*100/sum(sum(store_sales.ss_ext_sales_price)) over (partition by item.i_class) as revenueratio + |FROM date_dim, store_sales, item + |WHERE date_dim.d_date_sk = store_sales.ss_sold_date_sk + | AND store_sales.ss_item_sk = item.i_item_sk + | AND item.i_category in ('Sport', 'Books', 'Home') + | AND date_dim.d_date between cast('1999-02-22' as date) AND (cast('1999-02-22' as date) + interval 30 days) + |GROUP BY item.i_item_id, item.i_item_desc, item.i_category, item.i_class, item.i_current_price + """.stripMargin.trim, + s""" + |SELECT 'store' channel, store_sales.ss_store_sk col_name, date_dim.d_year, date_dim.d_qoy, + | item.i_category, SUM(store_sales.ss_ext_sales_price) ext_sales_price + |FROM date_dim, store_sales, item + |WHERE store_sales.ss_store_sk IS NULL + | AND store_sales.ss_sold_date_sk = date_dim.d_date_sk + | AND store_sales.ss_item_sk = item.i_item_sk + |GROUP BY channel, store_sales.ss_store_sk, date_dim.d_year, date_dim.d_qoy, item.i_category + """.stripMargin.trim, + s""" + |SELECT 'store' channel, store_sales.ss_store_sk col_name, date_dim.d_year, date_dim.d_qoy, + | item.i_category, SUM(store_sales.ss_ext_sales_price) ext_sales_price + |FROM date_dim, store_sales, item + |WHERE store_sales.ss_store_sk IS NULL + | AND store_sales.ss_sold_date_sk = date_dim.d_date_sk + | AND store_sales.ss_item_sk = item.i_item_sk + |GROUP BY store_sales.ss_store_sk, date_dim.d_year, date_dim.d_qoy, item.i_category + """.stripMargin.trim, + s""" + |SELECT item.i_brand_id brand_id, item.i_brand brand, SUM(ss_ext_sales_price) ext_price + |FROM date_dim, store_sales, item + |WHERE store_sales.ss_sold_date_sk = date_dim.d_date_sk + | AND store_sales.ss_item_sk = item.i_item_sk + | AND item.i_manager_id = 28 + | AND date_dim.d_year = 1999 + | AND date_dim.d_moy = 11 + |GROUP BY item.i_brand_id, item.i_brand + """.stripMargin.trim, + s""" + |SELECT item.i_brand_id brand_id, item.i_brand_id brand, SUM(ss_ext_sales_price) ext_price + |FROM date_dim, store_sales, item + |WHERE store_sales.ss_sold_date_sk = date_dim.d_date_sk + | AND store_sales.ss_item_sk = item.i_item_sk + | AND item.i_manager_id = 28 + | AND date_dim.d_year = 1999 + | AND date_dim.d_moy = 11 + |GROUP BY item.i_brand_id, item.i_class_id,item.i_category_id + """.stripMargin.trim, + s""" + |SELECT 'store' channel, item.i_brand_id, item.i_class_id, item.i_category_id, + | SUM(store_sales.ss_quantity*store_sales.ss_list_price) sales, count(*) number_sales + |FROM date_dim, store_sales, item + |WHERE date_dim.d_date_sk = store_sales.ss_sold_date_sk + | AND store_sales.ss_item_sk = item.i_item_sk + | AND date_dim.d_year = 1999 + 2 + | AND date_dim.d_moy = 11 + |GROUP BY item.i_brand_id, item.i_class_id,item.i_category_id + """.stripMargin.trim, + s""" + |SELECT substr(item.i_item_desc,1,30) itemdesc, item.i_item_sk item_sk, dt.d_date solddate, count(*) cnt + |FROM date_dim dt, store_sales, item + |WHERE dt.d_date_sk = store_sales.ss_sold_date_sk + | AND store_sales.ss_item_sk = item.i_item_sk + | AND dt.d_year in (2000, 2000+1, 2000+2, 2000+3) + |GROUP BY substr(item.i_item_desc,1,30), item.i_item_sk,dt.d_date + """.stripMargin.trim, + s""" + |SELECT fact.B + |FROM + | fact + |UNION ALL + |SELECT fact.B + |FROM + | fact + |UNION ALL + |SELECT fact.B + |FROM + | fact + """.stripMargin.trim, + s""" + |SELECT fact.A + |FROM + | fact + | JOIN dim ON (fact.K = dim.K) + | JOIN dim1 ON (fact.K = dim1.K AND dim1.G > 0) + |WHERE fact.C > dim.E AND dim.E IS NULL + |UNION ALL + |SELECT fact.A + |FROM + | fact + | JOIN dim ON (fact.K = dim.K) + | JOIN dim1 ON (fact.K = dim1.K AND dim1.G > 0) + |WHERE fact.C > dim.E AND dim.E IS NULL + |UNION ALL + |SELECT fact.B + |FROM + | fact + """.stripMargin.trim, + s""" + |SELECT fact.A + |FROM + | fact + | JOIN dim ON (fact.K = dim.K) + | JOIN dim1 ON (fact.K = dim1.K AND dim1.G > 0) + |WHERE fact.C > dim.E AND dim.E IS NULL + |UNION ALL + |SELECT fact.A + |FROM + | fact + | JOIN dim ON (fact.K = dim.K) + | JOIN dim1 ON (fact.K = dim1.K AND dim1.G > 0) + |WHERE fact.C > dim.E AND dim.E IS NULL + """.stripMargin.trim, + s""" + |SELECT fact.A + |FROM + | fact + | JOIN dim ON (fact.K = dim.K) + | JOIN dim1 ON (fact.K = dim1.K AND dim1.G > 0) + |WHERE fact.C > dim.E AND dim.E IS NULL + |UNION ALL + |SELECT fact.B + |FROM + | fact + """.stripMargin.trim, + s""" + |SELECT fact.A + |FROM + | fact + | JOIN dim ON (fact.K = dim.K) + | JOIN dim1 ON (fact.K = dim1.K AND dim1.G > 0) + |WHERE fact.C > dim.E AND dim.E IS NULL + |UNION ALL + |SELECT fact.A + |FROM + | fact + """.stripMargin.trim, + s""" + |SELECT f1.A,f1.B,COUNT(*) AS A + |FROM + | fact f1 + | JOIN dim d1 ON (f1.K = d1.K) + |GROUP BY f1.A,f1.B + """.stripMargin.trim, + s""" + |SELECT f1.A,f1.B,COUNT(*) AS A + |FROM + | fact f1 + | JOIN dim d1 ON (f1.K = d1.K) + |WHERE f1.E IS NULL AND f1.C > d1.E AND f1.B = 2 + |GROUP BY f1.A,f1.B + """.stripMargin.trim, + s""" + |SELECT f1.A,f1.B,COUNT(*) AS A + |FROM + | fact f1 + | JOIN dim d1 ON (f1.K = d1.K) + |WHERE f1.E IS NULL AND f1.C > d1.E AND d1.E = 3 + |GROUP BY f1.A,f1.B + """.stripMargin.trim, + s""" + |SELECT f1.A,f1.B,COUNT(*) AS A + |FROM + | fact f1 + | JOIN dim d1 ON (f1.K = d1.K) + |WHERE f1.E IS NULL AND f1.C > d1.E + |GROUP BY f1.A,f1.B + """.stripMargin.trim, + s""" + |SELECT f1.A,f1.B,COUNT(*) AS A + |FROM + | fact f1 + | JOIN dim d1 ON (f1.K = d1.K) + | JOIN dim d2 ON (f1.K = d2.K AND d2.E > 0) + |WHERE f1.E IS NULL AND f1.C > d1.E + |GROUP BY f1.A,f1.B + """.stripMargin.trim, + s""" + |SELECT fact.A,fact.B,COUNT(*) AS A + |FROM + | fact + | JOIN dim d1 ON (fact.K = d1.K) + | JOIN dim d2 ON (fact.K = d2.K AND d2.E > 0) + |WHERE fact.E IS NULL AND fact.C > d1.E + |GROUP BY fact.A,fact.B + """.stripMargin.trim, + s""" + |SELECT fact.A + |FROM + | fact + | JOIN dim ON (fact.K = dim.K) + | JOIN dim1 ON (fact.K = dim1.K) + |WHERE fact.C > dim.E AND (dim.E IS NULL OR dim1.G IS NULL) + """.stripMargin.trim, + s""" + |SELECT fact.A + |FROM + | fact + | JOIN dim ON (fact.K = dim.K) + | JOIN dim1 ON (fact.K = dim1.K) + |WHERE fact.C > dim.E OR dim1.G IS NULL + """.stripMargin.trim, + s""" + |SELECT fact.A + |FROM + | fact + | JOIN dim ON (fact.K = dim.K) + | JOIN dim1 ON (fact.K = dim1.K AND dim1.G > 0) + |WHERE fact.C > dim.E OR dim.E IS NULL + """.stripMargin.trim, + s""" + |SELECT fact.A + |FROM + | fact + | JOIN dim ON (fact.K = dim.K) + | JOIN dim1 ON (fact.K = dim1.K AND dim1.G > 0) + |WHERE fact.C > dim.E AND dim.E IS NULL + """.stripMargin.trim, + s""" + |SELECT fact.A + |FROM + | fact + | JOIN dim ON (fact.K = dim.K) + | JOIN dim1 ON (fact.K = dim1.K AND dim1.G > 0) + |WHERE fact.C > dim.E + """.stripMargin.trim, + s""" + |SELECT fact.A + |FROM + | fact + | JOIN dim ON (fact.K = dim.K) + | JOIN dim1 ON (fact.K = dim1.K AND dim1.G > 0) + """.stripMargin.trim, + s""" + |SELECT fact.A,fact.B,COUNT(*) AS A + |FROM + | fact + | JOIN dim ON (fact.K = dim.K AND fact.K IS NOT NULL) + | JOIN dim1 ON (fact.K = dim1.K AND dim1.G > 0 AND dim1.K IS NOT NULL) + |WHERE fact.E IS NULL AND fact.C > dim.E + |GROUP BY fact.A,fact.B + """.stripMargin.trim, + s""" + |SELECT fact.A,fact.B,COUNT(*) AS A + |FROM + | fact + | JOIN dim ON (fact.K = dim.K AND fact.K IS NOT NULL) + | JOIN dim1 ON (fact.K = dim1.K AND dim1.G > 0) + |WHERE fact.E IS NULL AND fact.C > dim.E + |GROUP BY fact.A,fact.B + """.stripMargin.trim, + s""" + |SELECT fact.A,fact.B,COUNT(*) AS A + |FROM + | fact + | JOIN dim ON (fact.K = dim.K) + | JOIN dim1 ON (fact.K = dim1.K AND dim1.G > 0) + |WHERE fact.E IS NULL AND fact.C > dim.E + |GROUP BY fact.A,fact.B + """.stripMargin.trim, + s""" + |SELECT fact.A,fact.B,COUNT(*) AS A + |FROM + | fact + | JOIN dim ON (fact.K = dim.K) + | JOIN dim1 ON (fact.K = dim1.K AND dim1.G > 0) + |WHERE fact.C > fact.E AND fact.C > dim.E + |GROUP BY fact.A,fact.B + """.stripMargin.trim, + s""" + |SELECT fact.A,fact.B,COUNT(*) AS A + |FROM + | fact + | JOIN dim ON (fact.K = dim.K) + | JOIN dim1 ON (fact.K = dim1.K) + |WHERE fact.C > fact.E AND (fact.C > dim.E OR dim1.G > 0) + |GROUP BY fact.A,fact.B + """.stripMargin.trim, + s""" + |SELECT fact.A,fact.B,COUNT(*) AS A + |FROM + | fact + | JOIN dim ON (fact.K = dim.K) + | JOIN dim1 ON (fact.K = dim1.K) + |WHERE fact.C > fact.E AND fact.C > dim.E OR dim1.G > 0 + |GROUP BY fact.A,fact.B + """.stripMargin.trim, + s""" + |SELECT fact.A,fact.B,COUNT(*) AS A + |FROM + | fact + | JOIN dim ON (fact.K = dim.K) + | JOIN dim1 ON (fact.K = dim1.K) + |WHERE fact.C > fact.E AND fact.C > dim.E + |GROUP BY fact.A,fact.B + """.stripMargin.trim, + s""" + |SELECT fact.A,fact.B,COUNT(*) AS A + |FROM + | fact + | JOIN dim ON (fact.K = dim.K) + | JOIN dim1 ON (fact.K = dim1.K) + |WHERE fact.C > fact.E OR fact.C > dim.E + |GROUP BY fact.A,fact.B + """.stripMargin.trim, + s""" + |SELECT fact.A,fact.B,COUNT(*) AS A + |FROM + | fact + | JOIN dim ON (fact.K = dim.K) + | JOIN dim1 ON (fact.K = dim1.K) + |WHERE fact.C > fact.E + |GROUP BY fact.A,fact.B + """.stripMargin.trim, + s""" + |SELECT fact.A,fact.B,COUNT(*) AS A + |FROM + | fact + | JOIN dim ON (fact.K = dim.K) + | JOIN dim1 ON (fact.K = dim1.K) + |GROUP BY fact.A,fact.B + """.stripMargin.trim, + s""" + |SELECT fact.A,COUNT(*) AS S1 + |FROM + | fact + | JOIN dim ON (fact.K = dim.K) + | JOIN dim1 ON (fact.K = dim1.K) + |GROUP BY fact.A + |--HAVING COUNT(*) > 5 + """.stripMargin.trim, + s""" + |SELECT fact.A,COUNT(*)--, my_fun(3) AS S1 + |FROM + | fact + | JOIN dim ON (fact.K = dim.K) + | JOIN dim1 ON (fact.K = dim1.K) + |GROUP BY fact.A + """.stripMargin.trim, + s""" + |SELECT fact.A,COUNT(*) AS S1 + |FROM + | fact + | JOIN dim ON (fact.K = dim.K) + | JOIN dim1 ON (fact.K = dim1.K) + |GROUP BY fact.A + """.stripMargin.trim, + s""" + |SELECT fact.A,sum(cast(dim.D as bigint)) AS S1 + |FROM + | fact + | JOIN dim ON (fact.K = dim.K) + | JOIN dim1 ON (fact.K = dim1.K) + |GROUP BY fact.A + """.stripMargin.trim, + s""" + |SELECT FOO.A, sum(cast(FOO.B as bigint)) AS S + |FROM (SELECT fact.A, fact.B + | FROM + | fact + | JOIN dim ON (fact.K = dim.K)) FOO + |GROUP BY FOO.A + """.stripMargin.trim, + s""" + |SELECT FOO.A, sum(cast(FOO.B as bigint)) AS S + |FROM (SELECT fact.A, fact.B + | FROM + | fact + | JOIN dim ON (fact.K = dim.K)) FOO + |GROUP BY FOO.A + """.stripMargin.trim, + s""" + |SELECT f1.A,f1.B,COUNT(*) + |FROM + | fact f1 + | JOIN fact f2 ON (f1.K = f2.K) + | JOIN fact f3 ON (f1.K = f3.K) + |GROUP BY f1.A,f1.B + """.stripMargin.trim, + s""" + |SELECT fact.A,fact.B,sum(cast(dim.D as bigint)) AS S1 + |FROM + | fact + | LEFT OUTER JOIN dim ON (fact.K = dim.K) + |GROUP BY fact.A,fact.B + """.stripMargin.trim, + s""" + |SELECT fact.A,fact.B,fact.C,sum(cast(dim.D as bigint)) AS S1 + |FROM + | fact + | LEFT OUTER JOIN dim ON (fact.K = dim.K) + |GROUP BY fact.A,fact.B,fact.C + """.stripMargin.trim, +// s""" +// |SELECT * +// |FROM fact, dim +// """.stripMargin.trim, + s""" + |SELECT store_sales.ss_store_sk,date_dim.d_year, + | COUNT(*) numsales + |FROM date_dim, store_sales + |WHERE date_dim.d_date_sk = store_sales.ss_sold_date_sk + |GROUP BY store_sales.ss_store_sk,date_dim.d_year GROUPING SETS (store_sales.ss_store_sk,date_dim.d_year) + """.stripMargin.trim, + s""" + |SELECT store_sales.ss_store_sk,date_dim.d_year, + | SUM(store_sales.ss_ext_sales_price) as itemrevenue + |FROM date_dim, store_sales + |WHERE date_dim.d_date_sk = store_sales.ss_sold_date_sk + |GROUP BY CUBE(store_sales.ss_store_sk,date_dim.d_year) + """.stripMargin.trim, + s""" + |SELECT date_dim.d_moy,date_dim.d_qoy, date_dim.d_year, + | SUM(store_sales.ss_ext_sales_price) as itemrevenue + |FROM date_dim, store_sales + |WHERE date_dim.d_date_sk = store_sales.ss_sold_date_sk + |GROUP BY ROLLUP(date_dim.d_moy,date_dim.d_qoy, date_dim.d_year) + """.stripMargin.trim + ) + val testSQLBatch2 = Seq[String]( + s""" + |SELECT f1.A,COUNT(*) AS B + |FROM + | fact f1 + | JOIN dim d1 ON (f1.K = d1.K) + |WHERE f1.E IS NULL AND (f1.C > d1.E OR d1.E = 3) + |GROUP BY f1.A + """.stripMargin.trim, + s""" + |SELECT f1.A,COUNT(*) AS B + |FROM + | fact f1 + | JOIN dim d1 ON (f1.K = d1.K) + | JOIN dim1 d2 ON (f1.K = d2.K AND d2.G > 0) + |WHERE f1.E IS NULL AND f1.C > d1.E + |GROUP BY f1.A + """.stripMargin.trim, + s""" + |SELECT substr(item.i_item_desc,1,30) itemdesc, item.i_item_sk item_sk, date_dim.d_date solddate, count(*) cnt + |FROM date_dim, store_sales, item + |WHERE date_dim.d_date_sk = store_sales.ss_sold_date_sk + | AND store_sales.ss_item_sk = item.i_item_sk + | AND date_dim.d_year in (2000, 2000+1, 2000+2, 2000+3) + |GROUP BY substr(item.i_item_desc,1,30), item.i_item_sk,date_dim.d_date + """.stripMargin.trim, + s""" + |SELECT item.i_item_desc, item.i_category, item.i_class, item.i_current_price, + | SUM(store_sales.ss_ext_sales_price) as itemrevenue, + | SUM(store_sales.ss_ext_sales_price)*100/sum(sum(store_sales.ss_ext_sales_price)) over (partition by item.i_class) as revenueratio + |FROM date_dim, store_sales, item + |WHERE date_dim.d_date_sk = store_sales.ss_sold_date_sk + | AND store_sales.ss_item_sk = item.i_item_sk + | AND item.i_category in ('Sport', 'Books', 'Home') + | AND date_dim.d_date between cast('1999-02-22' as date) AND (cast('1999-02-22' as date) + interval 30 days) + |GROUP BY item.i_item_id, item.i_item_desc, item.i_category, item.i_class, item.i_current_price + """.stripMargin.trim, + s""" + |SELECT 'store' channel, store_sales.ss_store_sk col_name, date_dim.d_year, date_dim.d_qoy, + | item.i_category, SUM(store_sales.ss_ext_sales_price) ext_sales_price + |FROM date_dim, store_sales, item + |WHERE store_sales.ss_store_sk IS NULL + | AND store_sales.ss_sold_date_sk = date_dim.d_date_sk + | AND store_sales.ss_item_sk = item.i_item_sk + |GROUP BY channel, store_sales.ss_store_sk, date_dim.d_year, date_dim.d_qoy, item.i_category + """.stripMargin.trim, + s""" + |SELECT 'store' channel, store_sales.ss_store_sk col_name, date_dim.d_year, date_dim.d_qoy, + | item.i_category, SUM(store_sales.ss_ext_sales_price) ext_sales_price + |FROM date_dim, store_sales, item + |WHERE store_sales.ss_store_sk IS NULL + | AND store_sales.ss_sold_date_sk = date_dim.d_date_sk + | AND store_sales.ss_item_sk = item.i_item_sk + |GROUP BY store_sales.ss_store_sk, date_dim.d_year, date_dim.d_qoy, item.i_category + """.stripMargin.trim, + s""" + |SELECT item.i_brand_id brand_id, item.i_brand brand, SUM(ss_ext_sales_price) ext_price + |FROM date_dim, store_sales, item + |WHERE store_sales.ss_sold_date_sk = date_dim.d_date_sk + | AND store_sales.ss_item_sk = item.i_item_sk + | AND item.i_manager_id = 28 + | AND date_dim.d_year = 1999 + | AND date_dim.d_moy = 11 + |GROUP BY item.i_brand_id, item.i_brand + """.stripMargin.trim, + s""" + |SELECT item.i_brand_id brand_id, item.i_brand_id brand, SUM(ss_ext_sales_price) ext_price + |FROM date_dim, store_sales, item + |WHERE store_sales.ss_sold_date_sk = date_dim.d_date_sk + | AND store_sales.ss_item_sk = item.i_item_sk + | AND item.i_manager_id = 28 + | AND date_dim.d_year = 1999 + | AND date_dim.d_moy = 11 + |GROUP BY item.i_brand_id, item.i_class_id,item.i_category_id + """.stripMargin.trim, + s""" + |SELECT 'store' channel, item.i_brand_id, item.i_class_id, item.i_category_id, + | SUM(store_sales.ss_quantity*store_sales.ss_list_price) sales, count(*) number_sales + |FROM date_dim, store_sales, item + |WHERE date_dim.d_date_sk = store_sales.ss_sold_date_sk + | AND store_sales.ss_item_sk = item.i_item_sk + | AND date_dim.d_year = 1999 + 2 + | AND date_dim.d_moy = 11 + |GROUP BY item.i_brand_id, item.i_class_id,item.i_category_id + """.stripMargin.trim, + s""" + |SELECT substr(item.i_item_desc,1,30) itemdesc, item.i_item_sk item_sk, dt.d_date solddate, count(*) cnt + |FROM date_dim dt, store_sales, item + |WHERE dt.d_date_sk = store_sales.ss_sold_date_sk + | AND store_sales.ss_item_sk = item.i_item_sk + | AND dt.d_year in (2000, 2000+1, 2000+2, 2000+3) + |GROUP BY substr(item.i_item_desc,1,30), item.i_item_sk,dt.d_date + """.stripMargin.trim, + + s""" + |SELECT store_sales.ss_store_sk,date_dim.d_year, + | COUNT(*) numsales + |FROM date_dim, store_sales + |WHERE date_dim.d_date_sk = store_sales.ss_sold_date_sk + |GROUP BY store_sales.ss_store_sk,date_dim.d_year GROUPING SETS (store_sales.ss_store_sk,date_dim.d_year) + """.stripMargin.trim, + s""" + |SELECT store_sales.ss_store_sk,date_dim.d_year, + | SUM(store_sales.ss_ext_sales_price) as itemrevenue + |FROM date_dim, store_sales + |WHERE date_dim.d_date_sk = store_sales.ss_sold_date_sk + |GROUP BY CUBE(store_sales.ss_store_sk,date_dim.d_year) + """.stripMargin.trim, + s""" + |SELECT date_dim.d_moy,date_dim.d_qoy, date_dim.d_year, + | SUM(store_sales.ss_ext_sales_price) as itemrevenue + |FROM date_dim, store_sales + |WHERE date_dim.d_date_sk = store_sales.ss_sold_date_sk + |GROUP BY ROLLUP(date_dim.d_moy,date_dim.d_qoy, date_dim.d_year) + """.stripMargin.trim + ) +} \ No newline at end of file