[ 
https://issues.apache.org/jira/browse/FLINK-32296?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Lim Qing Wei updated FLINK-32296:
---------------------------------
    Description: 
FlinkSQL produce incorrect result when involving data with type of ARRAY<ROW>, 
here's a reproduction:

 

 
{code:java}
CREATE TEMPORARY VIEW bug_data as (
SELECT CAST(ARRAY[
(10, '2020-01-10'), (101, '244ddf'), (1011, '2asdfaf'), (1110, '200'), (2210, 
'20-01-10'), (4410, '21111')
] AS ARRAY<ROW<A INT, B STRING>>)
UNION
SELECT CAST(ARRAY[
(10, '2020-01-10'), (121, '244ddf'), (2222, '2asdfaf'), (32243, '200'), (2210, 
'33333-01-10'), (4410, '23243243')
] AS ARRAY<ROW<A INT, B STRING>>)
UNION SELECT CAST(ARRAY[
(10, '2020-01-10'), (222, '244ddf'), (1011, '2asdfaf'), (1110, '200'), (24367, 
'20-01-10'), (4410, '21111')
] AS ARRAY<ROW<A INT, B STRING>>)
UNION SELECT CAST(ARRAY[
(10, '2020-01-10'), (5666, '244ddf'), (435243, '2asdfaf'), (56567, '200'), 
(2210, '20-01-10'), (4410, '21111')
] AS ARRAY<ROW<A INT, B STRING>>)
UNION SELECT CAST(ARRAY[
(10, '2020-01-10'), (43543, '244ddf'), (1011, '2asdfaf'), (1110, '200'), 
(8967564, '20-01-10'), (4410, '21111')
] AS ARRAY<ROW<A INT, B STRING>>)
);

CREATE TABLE sink (
r ARRAY<ROW<A INT, B STRING>>
) WITH ('connector' = 'print'); {code}
 

 

In all 1.15. 1.16 and 1.17 version I've tested, it produces the following:

 
{noformat}
[+I[4410, 21111], +I[4410, 21111], +I[4410, 21111], +I[4410, 21111], +I[4410, 
21111], +I[4410, 21111]]

[+I[4410, 23243243], +I[4410, 23243243], +I[4410, 23243243], +I[4410, 
23243243], +I[4410, 23243243], +I[4410, 23243243]]{noformat}
 

 

I think this is unexpected/wrong because:
 # The query should produce 5 rows, not 2
 # The data is also wrong, noticed it just make every row in the array the 
same, but the input are not the same.

 

  was:
FlinkSQL produce incorrect result when involving data with type of ARRAY<ROW>, 
here's a reproduction:

 

 
{code:java}
CREATE TEMPORARY VIEW bug_data as (
SELECT CAST(ARRAY[
(10, '2020-01-10'), (101, '244ddf'), (1011, '2asdfaf'), (1110, '200'), (2210, 
'20-01-10'), (4410, '21111')
] AS ARRAY<ROW<A INT, B STRING>>)
UNION
SELECT CAST(ARRAY[
(10, '2020-01-10'), (121, '244ddf'), (2222, '2asdfaf'), (32243, '200'), (2210, 
'33333-01-10'), (4410, '23243243')
] AS ARRAY<ROW<A INT, B STRING>>)
UNION SELECT CAST(ARRAY[
(10, '2020-01-10'), (222, '244ddf'), (1011, '2asdfaf'), (1110, '200'), (24367, 
'20-01-10'), (4410, '21111')
] AS ARRAY<ROW<A INT, B STRING>>)
UNION SELECT CAST(ARRAY[
(10, '2020-01-10'), (5666, '244ddf'), (435243, '2asdfaf'), (56567, '200'), 
(2210, '20-01-10'), (4410, '21111')
] AS ARRAY<ROW<A INT, B STRING>>)
UNION SELECT CAST(ARRAY[
(10, '2020-01-10'), (43543, '244ddf'), (1011, '2asdfaf'), (1110, '200'), 
(8967564, '20-01-10'), (4410, '21111')
] AS ARRAY<ROW<A INT, B STRING>>)
);

CREATE TABLE sink (
r ARRAY<ROW<A INT, B STRING>>
) WITH ('connector' = 'print'); {code}
 

 

In both 1.15 and 1.16, it produces the following:

 
{noformat}
[+I[4410, 21111], +I[4410, 21111], +I[4410, 21111], +I[4410, 21111], +I[4410, 
21111], +I[4410, 21111]]

[+I[4410, 23243243], +I[4410, 23243243], +I[4410, 23243243], +I[4410, 
23243243], +I[4410, 23243243], +I[4410, 23243243]]{noformat}
 

 

I think this is unexpected/wrong because:
 # The query should produce 5 rows, not 2
 # The data is also wrong, noticed it just make every row in the array the 
same, but the input are not the same.

 


> Flink SQL handle array of row incorrectly
> -----------------------------------------
>
>                 Key: FLINK-32296
>                 URL: https://issues.apache.org/jira/browse/FLINK-32296
>             Project: Flink
>          Issue Type: Bug
>          Components: Table SQL / API
>    Affects Versions: 1.15.3, 1.16.2, 1.17.1
>            Reporter: Lim Qing Wei
>            Priority: Major
>
> FlinkSQL produce incorrect result when involving data with type of 
> ARRAY<ROW>, here's a reproduction:
>  
>  
> {code:java}
> CREATE TEMPORARY VIEW bug_data as (
> SELECT CAST(ARRAY[
> (10, '2020-01-10'), (101, '244ddf'), (1011, '2asdfaf'), (1110, '200'), (2210, 
> '20-01-10'), (4410, '21111')
> ] AS ARRAY<ROW<A INT, B STRING>>)
> UNION
> SELECT CAST(ARRAY[
> (10, '2020-01-10'), (121, '244ddf'), (2222, '2asdfaf'), (32243, '200'), 
> (2210, '33333-01-10'), (4410, '23243243')
> ] AS ARRAY<ROW<A INT, B STRING>>)
> UNION SELECT CAST(ARRAY[
> (10, '2020-01-10'), (222, '244ddf'), (1011, '2asdfaf'), (1110, '200'), 
> (24367, '20-01-10'), (4410, '21111')
> ] AS ARRAY<ROW<A INT, B STRING>>)
> UNION SELECT CAST(ARRAY[
> (10, '2020-01-10'), (5666, '244ddf'), (435243, '2asdfaf'), (56567, '200'), 
> (2210, '20-01-10'), (4410, '21111')
> ] AS ARRAY<ROW<A INT, B STRING>>)
> UNION SELECT CAST(ARRAY[
> (10, '2020-01-10'), (43543, '244ddf'), (1011, '2asdfaf'), (1110, '200'), 
> (8967564, '20-01-10'), (4410, '21111')
> ] AS ARRAY<ROW<A INT, B STRING>>)
> );
> CREATE TABLE sink (
> r ARRAY<ROW<A INT, B STRING>>
> ) WITH ('connector' = 'print'); {code}
>  
>  
> In all 1.15. 1.16 and 1.17 version I've tested, it produces the following:
>  
> {noformat}
> [+I[4410, 21111], +I[4410, 21111], +I[4410, 21111], +I[4410, 21111], +I[4410, 
> 21111], +I[4410, 21111]]
> [+I[4410, 23243243], +I[4410, 23243243], +I[4410, 23243243], +I[4410, 
> 23243243], +I[4410, 23243243], +I[4410, 23243243]]{noformat}
>  
>  
> I think this is unexpected/wrong because:
>  # The query should produce 5 rows, not 2
>  # The data is also wrong, noticed it just make every row in the array the 
> same, but the input are not the same.
>  



--
This message was sent by Atlassian Jira
(v8.20.10#820010)

Reply via email to