[ 
https://issues.apache.org/jira/browse/KYLIN-5088?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Guowei Zhu updated KYLIN-5088:
------------------------------
    Description: 
there is hive(hive-3.1.2) table and view as below

 
{code:java}
//代码占位符
create table pageAds2(id int,pageid STRING,adid_list Array<string>) row format 
delimited fields terminated by ',' collection items terminated by '_';

insert into pageAds2 select 1, 'front_page', array('1','2','3');
insert into pageAds2 select 2, 'contact_page', array('3','4','5');

create view pageAds2_view as SELECT pageid, adid FROM pageAds2 LATERAL VIEW 
explode(adid_list) adTable AS adid;
{code}
After building cube of pageAds2_view
{code:java}
//SQL
SELECT
`PAGEADS2_VIEW`.`PAGEID` as `PAGEADS2_VIEW_PAGEID`
,`PAGEADS2_VIEW`.`ADID` as `PAGEADS2_VIEW_ADID`
 FROM `DEFAULT`.`PAGEADS2_VIEW` as `PAGEADS2_VIEW`
WHERE 1=1

// JSON(CUBE)
{
  "uuid": "391cdd5b-dbe9-6877-92fe-38f90ab55314",
  "last_modified": 1631355523311,
  "version": "4.0.0.0",
  "name": "pageAds2_view_cube_v1",
  "is_draft": false,
  "model_name": "pageAds2_view_model_v1",
  "description": "",
  "null_string": null,
  "dimensions": [
    {
      "name": "PAGEID",
      "table": "PAGEADS2_VIEW",
      "column": "PAGEID",
      "derived": null
    },
    {
      "name": "ADID",
      "table": "PAGEADS2_VIEW",
      "column": "ADID",
      "derived": null
    }
  ],
  "measures": [
    {
      "name": "_COUNT_",
      "function": {
        "expression": "COUNT",
        "parameter": {
          "type": "constant",
          "value": "1"
        },
        "returntype": "bigint"
      }
    }
  ],
  "dictionaries": [],
  "rowkey": {
    "rowkey_columns": [
      {
        "column": "PAGEADS2_VIEW.PAGEID",
        "encoding": "dict",
        "encoding_version": 1,
        "isShardBy": false
      },
      {
        "column": "PAGEADS2_VIEW.ADID",
        "encoding": "dict",
        "encoding_version": 1,
        "isShardBy": false
      }
    ]
  },
  "hbase_mapping": {
    "column_family": [
      {
        "name": "F1",
        "columns": [
          {
            "qualifier": "M",
            "measure_refs": [
              "_COUNT_"
            ]
          }
        ]
      }
    ]
  },
  "aggregation_groups": [
    {
      "includes": [
        "PAGEADS2_VIEW.PAGEID",
        "PAGEADS2_VIEW.ADID"
      ],
      "select_rule": {
        "hierarchy_dims": [],
        "mandatory_dims": [],
        "joint_dims": []
      }
    }
  ],
  "signature": "NdkZMCVkiStcS9+r2eEB0A==",
  "notify_list": [],
  "status_need_notify": [
    "ERROR",
    "DISCARDED",
    "SUCCEED"
  ],
  "partition_date_start": 0,
  "partition_date_end": 3153600000000,
  "auto_merge_time_ranges": [
    604800000,
    2419200000
  ],
  "volatile_range": 0,
  "retention_range": 0,
  "engine_type": 6,
  "storage_type": 2,
  "override_kylin_properties": {},
  "cuboid_black_list": [],
  "parent_forward": 3,
  "mandatory_dimension_set_list": [],
  "snapshot_table_desc_list": []
}{code}
in kylin Insight

 

execute below sql
{code:java}
//代码占位符
select ADID , count(*) from PAGEADS2_VIEW group by ADID
{code}
got below result

 
||ADID||EXPR$1||
|1_2_3|1|
|3_4_5|1|

 
  but execute the same sql in hive got below result
||adid||_c1||
|1|1|
|2|1|
|3|2|
|4|1|
|5|1|

 
  

 

 

 

 

 

  was:
there is hive(hive-3.1.2) table and view as below

 
{code:java}
//代码占位符
create table pageAds2(id int,pageid STRING,adid_list Array<string>) row format 
delimited fields terminated by ',' collection items terminated by '_';

insert into pageAds2 select 1, 'front_page', array('1','2','3');
insert into pageAds2 select 2, 'contact_page', array('3','4','5');

create view pageAds2_view as SELECT pageid, adid FROM pageAds2 LATERAL VIEW 
explode(adid_list) adTable AS adid;
{code}
After building cube of pageAds2_view in kylin Insight

execute below sql

 
{code:java}
//代码占位符
select ADID , count(*) from PAGEADS2_VIEW group by ADID
{code}
got below result

 

 
||ADID||EXPR$1||
|1_2_3|1|
|3_4_5|1|

 
 but execute the same sql in hive got below result
||adid||_c1||
|1|1|
|2|1|
|3|2|
|4|1|
|5|1|
 
 

 

 

 

 

 


> kylin4 build cube cannot correct process explode array column
> -------------------------------------------------------------
>
>                 Key: KYLIN-5088
>                 URL: https://issues.apache.org/jira/browse/KYLIN-5088
>             Project: Kylin
>          Issue Type: Bug
>          Components: Spark Engine
>    Affects Versions: v4.0.0
>            Reporter: Guowei Zhu
>            Priority: Major
>
> there is hive(hive-3.1.2) table and view as below
>  
> {code:java}
> //代码占位符
> create table pageAds2(id int,pageid STRING,adid_list Array<string>) row 
> format delimited fields terminated by ',' collection items terminated by '_';
> insert into pageAds2 select 1, 'front_page', array('1','2','3');
> insert into pageAds2 select 2, 'contact_page', array('3','4','5');
> create view pageAds2_view as SELECT pageid, adid FROM pageAds2 LATERAL VIEW 
> explode(adid_list) adTable AS adid;
> {code}
> After building cube of pageAds2_view
> {code:java}
> //SQL
> SELECT
> `PAGEADS2_VIEW`.`PAGEID` as `PAGEADS2_VIEW_PAGEID`
> ,`PAGEADS2_VIEW`.`ADID` as `PAGEADS2_VIEW_ADID`
>  FROM `DEFAULT`.`PAGEADS2_VIEW` as `PAGEADS2_VIEW`
> WHERE 1=1
> // JSON(CUBE)
> {
>   "uuid": "391cdd5b-dbe9-6877-92fe-38f90ab55314",
>   "last_modified": 1631355523311,
>   "version": "4.0.0.0",
>   "name": "pageAds2_view_cube_v1",
>   "is_draft": false,
>   "model_name": "pageAds2_view_model_v1",
>   "description": "",
>   "null_string": null,
>   "dimensions": [
>     {
>       "name": "PAGEID",
>       "table": "PAGEADS2_VIEW",
>       "column": "PAGEID",
>       "derived": null
>     },
>     {
>       "name": "ADID",
>       "table": "PAGEADS2_VIEW",
>       "column": "ADID",
>       "derived": null
>     }
>   ],
>   "measures": [
>     {
>       "name": "_COUNT_",
>       "function": {
>         "expression": "COUNT",
>         "parameter": {
>           "type": "constant",
>           "value": "1"
>         },
>         "returntype": "bigint"
>       }
>     }
>   ],
>   "dictionaries": [],
>   "rowkey": {
>     "rowkey_columns": [
>       {
>         "column": "PAGEADS2_VIEW.PAGEID",
>         "encoding": "dict",
>         "encoding_version": 1,
>         "isShardBy": false
>       },
>       {
>         "column": "PAGEADS2_VIEW.ADID",
>         "encoding": "dict",
>         "encoding_version": 1,
>         "isShardBy": false
>       }
>     ]
>   },
>   "hbase_mapping": {
>     "column_family": [
>       {
>         "name": "F1",
>         "columns": [
>           {
>             "qualifier": "M",
>             "measure_refs": [
>               "_COUNT_"
>             ]
>           }
>         ]
>       }
>     ]
>   },
>   "aggregation_groups": [
>     {
>       "includes": [
>         "PAGEADS2_VIEW.PAGEID",
>         "PAGEADS2_VIEW.ADID"
>       ],
>       "select_rule": {
>         "hierarchy_dims": [],
>         "mandatory_dims": [],
>         "joint_dims": []
>       }
>     }
>   ],
>   "signature": "NdkZMCVkiStcS9+r2eEB0A==",
>   "notify_list": [],
>   "status_need_notify": [
>     "ERROR",
>     "DISCARDED",
>     "SUCCEED"
>   ],
>   "partition_date_start": 0,
>   "partition_date_end": 3153600000000,
>   "auto_merge_time_ranges": [
>     604800000,
>     2419200000
>   ],
>   "volatile_range": 0,
>   "retention_range": 0,
>   "engine_type": 6,
>   "storage_type": 2,
>   "override_kylin_properties": {},
>   "cuboid_black_list": [],
>   "parent_forward": 3,
>   "mandatory_dimension_set_list": [],
>   "snapshot_table_desc_list": []
> }{code}
> in kylin Insight
>  
> execute below sql
> {code:java}
> //代码占位符
> select ADID , count(*) from PAGEADS2_VIEW group by ADID
> {code}
> got below result
>  
> ||ADID||EXPR$1||
> |1_2_3|1|
> |3_4_5|1|
>  
>   but execute the same sql in hive got below result
> ||adid||_c1||
> |1|1|
> |2|1|
> |3|2|
> |4|1|
> |5|1|
>  
>   
>  
>  
>  
>  
>  



--
This message was sent by Atlassian Jira
(v8.3.4#803005)

Reply via email to