[
https://issues.apache.org/jira/browse/KYLIN-5088?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
]
Guowei Zhu updated KYLIN-5088:
------------------------------
Description:
there is hive(hive-3.1.2) table and view as below
{code:java}
//代码占位符
create table pageAds2(id int,pageid STRING,adid_list Array<string>) row format
delimited fields terminated by ',' collection items terminated by '_';
insert into pageAds2 select 1, 'front_page', array('1','2','3');
insert into pageAds2 select 2, 'contact_page', array('3','4','5');
create view pageAds2_view as SELECT pageid, adid FROM pageAds2 LATERAL VIEW
explode(adid_list) adTable AS adid;
{code}
After building cube of pageAds2_view
{code:java}
//SQL
SELECT
`PAGEADS2_VIEW`.`PAGEID` as `PAGEADS2_VIEW_PAGEID`
,`PAGEADS2_VIEW`.`ADID` as `PAGEADS2_VIEW_ADID`
FROM `DEFAULT`.`PAGEADS2_VIEW` as `PAGEADS2_VIEW`
WHERE 1=1
// JSON(CUBE)
{
"uuid": "391cdd5b-dbe9-6877-92fe-38f90ab55314",
"last_modified": 1631355523311,
"version": "4.0.0.0",
"name": "pageAds2_view_cube_v1",
"is_draft": false,
"model_name": "pageAds2_view_model_v1",
"description": "",
"null_string": null,
"dimensions": [
{
"name": "PAGEID",
"table": "PAGEADS2_VIEW",
"column": "PAGEID",
"derived": null
},
{
"name": "ADID",
"table": "PAGEADS2_VIEW",
"column": "ADID",
"derived": null
}
],
"measures": [
{
"name": "_COUNT_",
"function": {
"expression": "COUNT",
"parameter": {
"type": "constant",
"value": "1"
},
"returntype": "bigint"
}
}
],
"dictionaries": [],
"rowkey": {
"rowkey_columns": [
{
"column": "PAGEADS2_VIEW.PAGEID",
"encoding": "dict",
"encoding_version": 1,
"isShardBy": false
},
{
"column": "PAGEADS2_VIEW.ADID",
"encoding": "dict",
"encoding_version": 1,
"isShardBy": false
}
]
},
"hbase_mapping": {
"column_family": [
{
"name": "F1",
"columns": [
{
"qualifier": "M",
"measure_refs": [
"_COUNT_"
]
}
]
}
]
},
"aggregation_groups": [
{
"includes": [
"PAGEADS2_VIEW.PAGEID",
"PAGEADS2_VIEW.ADID"
],
"select_rule": {
"hierarchy_dims": [],
"mandatory_dims": [],
"joint_dims": []
}
}
],
"signature": "NdkZMCVkiStcS9+r2eEB0A==",
"notify_list": [],
"status_need_notify": [
"ERROR",
"DISCARDED",
"SUCCEED"
],
"partition_date_start": 0,
"partition_date_end": 3153600000000,
"auto_merge_time_ranges": [
604800000,
2419200000
],
"volatile_range": 0,
"retention_range": 0,
"engine_type": 6,
"storage_type": 2,
"override_kylin_properties": {},
"cuboid_black_list": [],
"parent_forward": 3,
"mandatory_dimension_set_list": [],
"snapshot_table_desc_list": []
}{code}
in kylin Insight
execute below sql
{code:java}
//代码占位符
select ADID , count(*) from PAGEADS2_VIEW group by ADID
{code}
got below result
||ADID||EXPR$1||
|1_2_3|1|
|3_4_5|1|
but execute the same sql in hive got below result
||adid||_c1||
|1|1|
|2|1|
|3|2|
|4|1|
|5|1|
was:
there is hive(hive-3.1.2) table and view as below
{code:java}
//代码占位符
create table pageAds2(id int,pageid STRING,adid_list Array<string>) row format
delimited fields terminated by ',' collection items terminated by '_';
insert into pageAds2 select 1, 'front_page', array('1','2','3');
insert into pageAds2 select 2, 'contact_page', array('3','4','5');
create view pageAds2_view as SELECT pageid, adid FROM pageAds2 LATERAL VIEW
explode(adid_list) adTable AS adid;
{code}
After building cube of pageAds2_view in kylin Insight
execute below sql
{code:java}
//代码占位符
select ADID , count(*) from PAGEADS2_VIEW group by ADID
{code}
got below result
||ADID||EXPR$1||
|1_2_3|1|
|3_4_5|1|
but execute the same sql in hive got below result
||adid||_c1||
|1|1|
|2|1|
|3|2|
|4|1|
|5|1|
> kylin4 build cube cannot correct process explode array column
> -------------------------------------------------------------
>
> Key: KYLIN-5088
> URL: https://issues.apache.org/jira/browse/KYLIN-5088
> Project: Kylin
> Issue Type: Bug
> Components: Spark Engine
> Affects Versions: v4.0.0
> Reporter: Guowei Zhu
> Priority: Major
>
> there is hive(hive-3.1.2) table and view as below
>
> {code:java}
> //代码占位符
> create table pageAds2(id int,pageid STRING,adid_list Array<string>) row
> format delimited fields terminated by ',' collection items terminated by '_';
> insert into pageAds2 select 1, 'front_page', array('1','2','3');
> insert into pageAds2 select 2, 'contact_page', array('3','4','5');
> create view pageAds2_view as SELECT pageid, adid FROM pageAds2 LATERAL VIEW
> explode(adid_list) adTable AS adid;
> {code}
> After building cube of pageAds2_view
> {code:java}
> //SQL
> SELECT
> `PAGEADS2_VIEW`.`PAGEID` as `PAGEADS2_VIEW_PAGEID`
> ,`PAGEADS2_VIEW`.`ADID` as `PAGEADS2_VIEW_ADID`
> FROM `DEFAULT`.`PAGEADS2_VIEW` as `PAGEADS2_VIEW`
> WHERE 1=1
> // JSON(CUBE)
> {
> "uuid": "391cdd5b-dbe9-6877-92fe-38f90ab55314",
> "last_modified": 1631355523311,
> "version": "4.0.0.0",
> "name": "pageAds2_view_cube_v1",
> "is_draft": false,
> "model_name": "pageAds2_view_model_v1",
> "description": "",
> "null_string": null,
> "dimensions": [
> {
> "name": "PAGEID",
> "table": "PAGEADS2_VIEW",
> "column": "PAGEID",
> "derived": null
> },
> {
> "name": "ADID",
> "table": "PAGEADS2_VIEW",
> "column": "ADID",
> "derived": null
> }
> ],
> "measures": [
> {
> "name": "_COUNT_",
> "function": {
> "expression": "COUNT",
> "parameter": {
> "type": "constant",
> "value": "1"
> },
> "returntype": "bigint"
> }
> }
> ],
> "dictionaries": [],
> "rowkey": {
> "rowkey_columns": [
> {
> "column": "PAGEADS2_VIEW.PAGEID",
> "encoding": "dict",
> "encoding_version": 1,
> "isShardBy": false
> },
> {
> "column": "PAGEADS2_VIEW.ADID",
> "encoding": "dict",
> "encoding_version": 1,
> "isShardBy": false
> }
> ]
> },
> "hbase_mapping": {
> "column_family": [
> {
> "name": "F1",
> "columns": [
> {
> "qualifier": "M",
> "measure_refs": [
> "_COUNT_"
> ]
> }
> ]
> }
> ]
> },
> "aggregation_groups": [
> {
> "includes": [
> "PAGEADS2_VIEW.PAGEID",
> "PAGEADS2_VIEW.ADID"
> ],
> "select_rule": {
> "hierarchy_dims": [],
> "mandatory_dims": [],
> "joint_dims": []
> }
> }
> ],
> "signature": "NdkZMCVkiStcS9+r2eEB0A==",
> "notify_list": [],
> "status_need_notify": [
> "ERROR",
> "DISCARDED",
> "SUCCEED"
> ],
> "partition_date_start": 0,
> "partition_date_end": 3153600000000,
> "auto_merge_time_ranges": [
> 604800000,
> 2419200000
> ],
> "volatile_range": 0,
> "retention_range": 0,
> "engine_type": 6,
> "storage_type": 2,
> "override_kylin_properties": {},
> "cuboid_black_list": [],
> "parent_forward": 3,
> "mandatory_dimension_set_list": [],
> "snapshot_table_desc_list": []
> }{code}
> in kylin Insight
>
> execute below sql
> {code:java}
> //代码占位符
> select ADID , count(*) from PAGEADS2_VIEW group by ADID
> {code}
> got below result
>
> ||ADID||EXPR$1||
> |1_2_3|1|
> |3_4_5|1|
>
> but execute the same sql in hive got below result
> ||adid||_c1||
> |1|1|
> |2|1|
> |3|2|
> |4|1|
> |5|1|
>
>
>
>
>
>
>
--
This message was sent by Atlassian Jira
(v8.3.4#803005)