[ 
https://issues.apache.org/jira/browse/KYLIN-5038?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

star_dev updated KYLIN-5038:
----------------------------
    Attachment: KYLIN疑问.sql

> Kylin每日增量构建(etl_dt),当分区字段etl_dt的条件值非固定值(如20210101),而是变量值时(如REPLACE ( cast( 
> TIMESTAMPADD( DAY,- 31, CURRENT_DATE ) AS VARCHAR ), '-', '' )),扫全表
> ----------------------------------------------------------------------------------------------------------------------------------------------
>
>                 Key: KYLIN-5038
>                 URL: https://issues.apache.org/jira/browse/KYLIN-5038
>             Project: Kylin
>          Issue Type: Bug
>            Reporter: star_dev
>            Priority: Major
>         Attachments: KYLIN疑问.sql
>
>
> 整体验证思路为
> 1. 写case语句,查看相应的执行计划
> 2. case语句执行,看kylin.log找到相应的查询总结
> 3. 分析,改写case语句
>  
> case 1 临时表处理时间变化,join 临时表,where条件中的etl_dt与临时表日期进行判断
> ------------------------ SQL ------------------------
> aaa as (
> select REPLACE ( cast( TIMESTAMPADD( DAY,- 31, CURRENT_DATE ) AS VARCHAR ), 
> '-', '' ) as sdate,
> REPLACE ( cast( TIMESTAMPADD( DAY,- 1, CURRENT_DATE ) AS VARCHAR ), '-', '' ) 
> as edate
> )
> SELECT
>  ETL_DT "日期",
>  sum( CASE DEDUCTION_FLAG WHEN 1 THEN people END ) "扣费用户数",
>  sum( CASE RECHARGE_FLAG WHEN 1 THEN people END ) AS "充值用户数",
>  sum( PAY_MONEY_LOCAL ) "充值金额(税前本地币)" 
> FROM
>  (
>  SELECT
>  ETL_DT,
>  DEDUCTION_FLAG,
>  RECHARGE_FLAG,
>  count( 1 ) AS people,
>  sum( PAY_MONEY_LOCAL ) PAY_MONEY_LOCAL 
>  FROM
>  VIEW_SUBSCRIBER join aaa
> on 1= 1
>  WHERE 1=1
>  and ETL_DT >= aaa.sdate
>  AND ETL_DT <= aaa.edate
>  AND COMPANY_ID = 17 
>  AND LOCAL_ACTIVE_DAY <= '20210514' 
>  AND WCT_FLAG = 0 
>  AND T1_FLAG = 0 
>  AND NDS_FLAG = 0 
>  AND BUSINESS_ID != 3 
>  AND DTH_FLAG = 1 
>  AND DTV_FLAG = 0 
>  GROUP BY
>  ETL_DT,
>  DEDUCTION_FLAG,
>  RECHARGE_FLAG 
>  ) 
> GROUP BY
>  ETL_DT 
> ORDER BY
>  ETL_DT ASC
> ------------------------ SQL 执行计划------------------------
> OLAPToEnumerableConverter OLAPLimitRel(ctx=[], fetch=[50000]) 
> OLAPSortRel(sort0=[$0], dir0=[ASC], ctx=[]) 
> OLAPAggregateRel(group=[\{0}], 扣费用户数=[SUM($1)], 充值用户数=[SUM($2)], 
> 充值金额(税前本地币)=[SUM($3)], ctx=[]) 
> OLAPProjectRel(日期=[$0], $f1=[CASE(=($1, 1), CAST($3):BIGINT, null)], 
> $f2=[CASE(=($2, 1), CAST($3):BIGINT, null)], PAY_MONEY_LOCAL=[$4], ctx=[]) 
> OLAPAggregateRel(group=[\{0, 1, 2}], PEOPLE=[COUNT()], 
> PAY_MONEY_LOCAL=[SUM($3)], ctx=[]) 
> OLAPProjectRel(ETL_DT=[$33], DEDUCTION_FLAG=[$27], RECHARGE_FLAG=[$28], 
> PAY_MONEY_LOCAL=[$34], ctx=[]) 
> OLAPFilterRel(condition=
>  [AND(=(1, 1), 
>  >=($33, $52), 
>  <=($33, $53), 
>  =($3, 17), 
>  <=($32, '20210514'), 
>  =(CAST($30):INTEGER, 0), 
>  =(CAST($29):INTEGER, 0), 
>  =(CAST($31):INTEGER, 0), 
>  <>($10, 3), 
>  =(CAST($19):INTEGER, 1), 
>  =(CAST($20):INTEGER, 0))], ctx=[]) 
> OLAPJoinRel(
>  condition=[true], 
>  joinType=[inner], 
>  ctx=[]) 
> OLAPTableScan(
>  table=[[BOSS_ODS, VIEW_SUBSCRIBER]], ctx=[], fields=[[0, 1, 2, 3, 4, 5, 6, 
> 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 
> 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 
> 46, 47, 48, 49, 50, 51]]) 
> OLAPProjectRel(SDATE=[REPLACE(CAST(DATETIME_PLUS(CURRENT_DATE, *(86400000, 
> -31))):VARCHAR CHARACTER SET "UTF-16LE" COLLATE "UTF-16LE$en_US$primary" NOT 
> NULL, '-', '')], 
>  EDATE=[REPLACE(CAST(DATETIME_PLUS(CURRENT_DATE, *(86400000, -1))):VARCHAR 
> CHARACTER SET "UTF-16LE" COLLATE "UTF-16LE$en_US$primary" NOT NULL, '-', 
> '')], ctx=[]) OLAPValuesRel(tuples=[[\{ 0 }]])
> ------------------------ SQL 查询总结------------------------
> Success: true
> Duration: 0.816
> Project: dvb_ods
> Realization Names: [CUBE[name=view_subscriber]]
> Cuboid Ids: [8586246208]
> Total scan count: 720352
> Total scan bytes: 89324514
> Result row count: 30
> Accept Partial: true
> Is Partial Result: false
> Hit Exception Cache: false
> Storage cache used: false
> Is Query Push-Down: false
> Is Prepare: false
> Trace URL: null
> Message: null
>  
>  
> case 2 临时表处理时间变化,join 临时表,on条件中的etl_dt与临时表日期进行判断  
> ------------------------ SQL ------------------------
> aaa as (
> select REPLACE ( cast( TIMESTAMPADD( DAY,- 31, CURRENT_DATE ) AS VARCHAR ), 
> '-', '' ) as sdate,
> REPLACE ( cast( TIMESTAMPADD( DAY,- 1, CURRENT_DATE ) AS VARCHAR ), '-', '' ) 
> as edate
> )
> SELECT
>  ETL_DT "日期",
>  sum( CASE DEDUCTION_FLAG WHEN 1 THEN people END ) "扣费用户数",
>  sum( CASE RECHARGE_FLAG WHEN 1 THEN people END ) AS "充值用户数",
>  sum( PAY_MONEY_LOCAL ) "充值金额(税前本地币)" 
> FROM
>  (
>  SELECT
>  ETL_DT,
>  DEDUCTION_FLAG,
>  RECHARGE_FLAG,
>  count( 1 ) AS people,
>  sum( PAY_MONEY_LOCAL ) PAY_MONEY_LOCAL 
>  FROM
>  VIEW_SUBSCRIBER join aaa 
>  on ETL_DT >= aaa.sdate AND ETL_DT <= aaa.edate
> WHERE 1=1
>  AND COMPANY_ID = 17 
>  AND LOCAL_ACTIVE_DAY <= '20210514' 
>  AND WCT_FLAG = 0 
>  AND T1_FLAG = 0 
>  AND NDS_FLAG = 0 
>  AND BUSINESS_ID != 3 
>  AND DTH_FLAG = 1 
>  AND DTV_FLAG = 0 
>  GROUP BY
>  ETL_DT,
>  DEDUCTION_FLAG,
>  RECHARGE_FLAG 
>  ) 
> GROUP BY
>  ETL_DT 
> ORDER BY
>  ETL_DT ASC
> ------------------------ SQL 执行计划------------------------
> OLAPToEnumerableConverter OLAPLimitRel(ctx=[], fetch=[50000]) 
> OLAPSortRel(sort0=[$0], dir0=[ASC], ctx=[]) 
> OLAPAggregateRel(group=[\{0}], 扣费用户数=[SUM($1)], 充值用户数=[SUM($2)], 
> 充值金额(税前本地币)=[SUM($3)], ctx=[]) 
> OLAPProjectRel(日期=[$0], $f1=[CASE(=($1, 1), CAST($3):BIGINT, null)], 
> $f2=[CASE(=($2, 1), CAST($3):BIGINT, null)], PAY_MONEY_LOCAL=[$4], ctx=[]) 
> OLAPAggregateRel(group=[\{0, 1, 2}], PEOPLE=[COUNT()], 
> PAY_MONEY_LOCAL=[SUM($3)], ctx=[])
>  OLAPProjectRel(ETL_DT=[$33], DEDUCTION_FLAG=[$27], RECHARGE_FLAG=[$28], 
> PAY_MONEY_LOCAL=[$34], ctx=[]) 
>  OLAPFilterRel(condition=[AND(>=($33, $52), <=($33, $53), =($3, 17), <=($32, 
> '20210514'), =(CAST($30):INTEGER, 0), =(CAST($29):INTEGER, 0), 
> =(CAST($31):INTEGER, 0), <>($10, 3), =(CAST($19):INTEGER, 1), 
> =(CAST($20):INTEGER, 0))], ctx=[]) 
>  OLAPJoinRel(condition=[true], joinType=[inner], ctx=[]) 
>  OLAPTableScan(table=[[BOSS_ODS, VIEW_SUBSCRIBER]], ctx=[], fields=[[0, 1, 2, 
> 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 
> 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 
> 43, 44, 45, 46, 47, 48, 49, 50, 51]]) 
>  OLAPProjectRel(SDATE=[REPLACE(CAST(DATETIME_PLUS(CURRENT_DATE, *(86400000, 
> -31))):VARCHAR CHARACTER SET "UTF-16LE" COLLATE "UTF-16LE$en_US$primary" NOT 
> NULL, '-', '')], 
>  EDATE=[REPLACE(CAST(DATETIME_PLUS(CURRENT_DATE, *(86400000, -1))):VARCHAR 
> CHARACTER SET "UTF-16LE" COLLATE "UTF-16LE$en_US$primary" NOT NULL, '-', 
> '')], ctx=[]) OLAPValuesRel(tuples=[[\{ 0 }]])
> ------------------------ SQL 查询总结------------------------
> Success: true
> Duration: 4.993
> Project: dvb_ods
> Realization Names: [CUBE[name=view_subscriber]]
> Cuboid Ids: [8586246208]
> Total scan count: 720352
> Total scan bytes: 89324514
> Result row count: 30
> Accept Partial: true
> Is Partial Result: false
> Hit Exception Cache: false
> Storage cache used: false
> Is Query Push-Down: false
> Is Prepare: false
> Trace URL: null
> Message: null
>  
>  
> case 3 临时表处理时间变化,join 临时表,但是where的etl_dt直接写死日期,没有扫全表
> ------------------------ SQL ------------------------
> aaa as (
> select REPLACE ( cast( TIMESTAMPADD( DAY,- 31, CURRENT_DATE ) AS VARCHAR ), 
> '-', '' ) as sdate,
> REPLACE ( cast( TIMESTAMPADD( DAY,- 1, CURRENT_DATE ) AS VARCHAR ), '-', '' ) 
> as edate
> )
> SELECT
>  ETL_DT "日期",
>  sum( CASE DEDUCTION_FLAG WHEN 1 THEN people END ) "扣费用户数",
>  sum( CASE RECHARGE_FLAG WHEN 1 THEN people END ) AS "充值用户数",
>  sum( PAY_MONEY_LOCAL ) "充值金额(税前本地币)" 
> FROM
>  (
>  SELECT
>  ETL_DT,
>  DEDUCTION_FLAG,
>  RECHARGE_FLAG,
>  count( 1 ) AS people,
>  sum( PAY_MONEY_LOCAL ) PAY_MONEY_LOCAL 
>  FROM
>  VIEW_SUBSCRIBER join aaa
> on 1= 1
>  WHERE 1=1
>  and ETL_DT >= '20210615'
>  AND ETL_DT <= '20210715'
>  AND COMPANY_ID = 17 
>  AND LOCAL_ACTIVE_DAY <= '20210514' 
>  AND WCT_FLAG = 0 
>  AND T1_FLAG = 0 
>  AND NDS_FLAG = 0 
>  AND BUSINESS_ID != 3 
>  AND DTH_FLAG = 1 
>  AND DTV_FLAG = 0 
>  GROUP BY
>  ETL_DT,
>  DEDUCTION_FLAG,
>  RECHARGE_FLAG 
>  ) 
> GROUP BY
>  ETL_DT 
> ORDER BY
>  ETL_DT ASC
> ------------------------ SQL 执行计划------------------------
> OLAPToEnumerableConverter OLAPLimitRel(ctx=[], fetch=[50000]) 
> OLAPSortRel(sort0=[$0], dir0=[ASC], ctx=[]) 
> OLAPAggregateRel(group=[\{0}], 扣费用户数=[SUM($1)], 充值用户数=[SUM($2)], 
> 充值金额(税前本地币)=[SUM($3)], ctx=[]) 
> OLAPProjectRel(日期=[$0], $f1=[CASE(=($1, 1), CAST($3):BIGINT, null)], 
> $f2=[CASE(=($2, 1), CAST($3):BIGINT, null)], PAY_MONEY_LOCAL=[$4], ctx=[]) 
> OLAPAggregateRel(group=[\{0, 1, 2}], PEOPLE=[COUNT()], 
> PAY_MONEY_LOCAL=[SUM($3)], ctx=[]) OLAPProjectRel(ETL_DT=[$33], 
> DEDUCTION_FLAG=[$27], RECHARGE_FLAG=[$28], PAY_MONEY_LOCAL=[$34], ctx=[]) 
> OLAPFilterRel(condition=[AND(=(1, 1), >=($33, '20210615'), <=($33, 
> '20210715'), =($3, 17), <=($32, '20210514'), =(CAST($30):INTEGER, 0), 
> =(CAST($29):INTEGER, 0), =(CAST($31):INTEGER, 0), <>($10, 3), 
> =(CAST($19):INTEGER, 1), =(CAST($20):INTEGER, 0))], ctx=[]) 
> OLAPJoinRel(condition=[true], joinType=[inner], ctx=[]) 
> OLAPTableScan(table=[[BOSS_ODS, VIEW_SUBSCRIBER]], ctx=[], fields=[[0, 1, 2, 
> 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 
> 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 
> 43, 44, 45, 46, 47, 48, 49, 50, 51]]) 
> OLAPProjectRel(SDATE=[REPLACE(CAST(DATETIME_PLUS(CURRENT_DATE, *(86400000, 
> -31))):VARCHAR CHARACTER SET "UTF-16LE" COLLATE "UTF-16LE$en_US$primary" NOT 
> NULL, '-', '')], EDATE=[REPLACE(CAST(DATETIME_PLUS(CURRENT_DATE, *(86400000, 
> -1))):VARCHAR CHARACTER SET "UTF-16LE" COLLATE "UTF-16LE$en_US$primary" NOT 
> NULL, '-', '')], ctx=[]) OLAPValuesRel(tuples=[[\{ 0 }]])
> ------------------------ SQL 查询总结------------------------
> Success: true
> Duration: 4.712
> Project: dvb_ods
> Realization Names: [CUBE[name=view_subscriber]]
> Cuboid Ids: [8586246208]
> Total scan count: 561581
> Total scan bytes: 69636323
> Result row count: 30
> Accept Partial: true
> Is Partial Result: false
> Hit Exception Cache: false
> Storage cache used: false
> Is Query Push-Down: false
> Is Prepare: false
> Trace URL: null
> Message: null
>  
>  
> case 4 临时表时间直接写死,join 临时表,where条件中的etl_dt与临时表日期进行判断 
> ------------------------ SQL ------------------------
> SQL: with abb as
> (
>  select '20210715' as end_date,'20210615' as start_date )
>  
> select
>  ETL_DT "日期",
>  sum(
>  case
>  DEDUCTION_FLAG
>  when 1 THEN people
>  end
>  ) "扣费用户数",
>  sum(
>  case
>  RECHARGE_FLAG
>  when 1 THEN people
>  end
>  ) as "充值用户数",
>  sum(PAY_MONEY_LOCAL) "充值金额(税前本地币)"
> from
>  (
>  SELECT
>  ETL_DT,
>  DEDUCTION_FLAG,
>  RECHARGE_FLAG,
>  count(1) as people,
>  sum(PAY_MONEY_LOCAL) PAY_MONEY_LOCAL
>  from
>  VIEW_SUBSCRIBER
>  join abb on 1=1
>  where
>  ETL_DT >= start_date 
>  and ETL_DT <= end_date 
>  
>  and COMPANY_ID = 17
>  and LOCAL_ACTIVE_DAY <= '20210514'
>  and WCT_FLAG = 0
>  and T1_FLAG = 0
>  and NDS_FLAG = 0
>  and BUSINESS_ID != 3
>  and DTH_FLAG = 1
>  and DTV_FLAG = 0
>  GROUP by
>  ETL_DT,
>  DEDUCTION_FLAG,
>  RECHARGE_FLAG
>  )
> GROUP by
>  ETL_DT
> ORDER by
>  ETL_DT asc
> ------------------------ SQL 执行计划------------------------
> ------------------------ SQL 查询总结------------------------
> Success: true
> Duration: 0.699
> Project: dvb_ods
> Realization Names: [CUBE[name=view_subscriber]]
> Cuboid Ids: [8586246208]
> Total scan count: 720352
> Total scan bytes: 89324514
> Result row count: 30
> Accept Partial: true
> Is Partial Result: false
> Hit Exception Cache: false
> Storage cache used: false
> Is Query Push-Down: false
> Is Prepare: false
> Trace URL: null
> Message: null
>  



--
This message was sent by Atlassian Jira
(v8.3.4#803005)

Reply via email to