[ 
https://issues.apache.org/jira/browse/IMPALA-402?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16661387#comment-16661387
 ] 

Tim Armstrong commented on IMPALA-402:
--------------------------------------

I can't reproduce this on master either with clustered or non-clustered hits. 
I'm not sure which commits fixed it. I think we should add a regression test 
for the gap.
{noformat}
[localhost:21000] default> create table t (i int) partitioned by (p int);
Query: create table t (i int) partitioned by (p int)
+-------------------------+
| summary                 |
+-------------------------+
| Table has been created. |
+-------------------------+
Fetched 1 row(s) in 0.03s
[localhost:21000] default> insert into t partition(p) /*+noclustered*/select 
id, cast(rand()*10 as int) from functional.alltypestiny;
Query: insert into t partition(p) /*+noclustered*/select id, cast(rand()*10 as 
int) from functional.alltypestiny
Query submitted at: 2018-10-23 15:20:38 (Coordinator: 
http://tarmstrong-box:25000)
Query progress can be monitored at: 
http://tarmstrong-box:25000/query_plan?query_id=3747f3c955b9784e:534d4ad300000000
Modified 8 row(s) in 0.52s
[localhost:21000] default> show partitions t;
Query: show partitions t
+-------+-------+--------+------+--------------+-------------------+--------+-------------------+---------------------------------------------+
| p     | #Rows | #Files | Size | Bytes Cached | Cache Replication | Format | 
Incremental stats | Location                                    |
+-------+-------+--------+------+--------------+-------------------+--------+-------------------+---------------------------------------------+
| 2     | -1    | 3      | 10B  | NOT CACHED   | NOT CACHED        | TEXT   | 
false             | hdfs://localhost:20500/test-warehouse/t/p=2 |
| 3     | -1    | 1      | 2B   | NOT CACHED   | NOT CACHED        | TEXT   | 
false             | hdfs://localhost:20500/test-warehouse/t/p=3 |
| 6     | -1    | 2      | 4B   | NOT CACHED   | NOT CACHED        | TEXT   | 
false             | hdfs://localhost:20500/test-warehouse/t/p=6 |
| Total | -1    | 6      | 16B  | 0B           |                   |        |   
                |                                             |
+-------+-------+--------+------+--------------+-------------------+--------+-------------------+---------------------------------------------+
Fetched 4 row(s) in 0.00s
{noformat}

{noformat}
[localhost:21000] default> truncate t;
Query: truncate t
Query submitted at: 2018-10-23 15:21:02 (Coordinator: 
http://tarmstrong-box:25000)
Query progress can be monitored at: 
http://tarmstrong-box:25000/query_plan?query_id=3342013d0b1a6fd7:1b052c9f00000000
+---------------------------+
| summary                   |
+---------------------------+
| Table has been truncated. |
+---------------------------+
Fetched 1 row(s) in 0.06s
[localhost:21000] default> insert into t partition(p) /*+clustered*/select id, 
cast(rand()*10 as int) from functional.alltypestiny;
Query: insert into t partition(p) /*+clustered*/select id, cast(rand()*10 as 
int) from functional.alltypestiny
Query submitted at: 2018-10-23 15:21:08 (Coordinator: 
http://tarmstrong-box:25000)
Query progress can be monitored at: 
http://tarmstrong-box:25000/query_plan?query_id=f2446f5739e65fbc:ee638d2100000000
Modified 8 row(s) in 0.41s
[localhost:21000] default> show partitions t;
Query: show partitions t
+-------+-------+--------+------+--------------+-------------------+--------+-------------------+---------------------------------------------+
| p     | #Rows | #Files | Size | Bytes Cached | Cache Replication | Format | 
Incremental stats | Location                                    |
+-------+-------+--------+------+--------------+-------------------+--------+-------------------+---------------------------------------------+
| 0     | -1    | 1      | 2B   | NOT CACHED   | NOT CACHED        | TEXT   | 
false             | hdfs://localhost:20500/test-warehouse/t/p=0 |
| 2     | -1    | 1      | 6B   | NOT CACHED   | NOT CACHED        | TEXT   | 
false             | hdfs://localhost:20500/test-warehouse/t/p=2 |
| 3     | -1    | 1      | 2B   | NOT CACHED   | NOT CACHED        | TEXT   | 
false             | hdfs://localhost:20500/test-warehouse/t/p=3 |
| 4     | -1    | 1      | 2B   | NOT CACHED   | NOT CACHED        | TEXT   | 
false             | hdfs://localhost:20500/test-warehouse/t/p=4 |
| 6     | -1    | 1      | 2B   | NOT CACHED   | NOT CACHED        | TEXT   | 
false             | hdfs://localhost:20500/test-warehouse/t/p=6 |
| 7     | -1    | 1      | 2B   | NOT CACHED   | NOT CACHED        | TEXT   | 
false             | hdfs://localhost:20500/test-warehouse/t/p=7 |
| Total | -1    | 6      | 16B  | 0B           |                   |        |   
                |                                             |
+-------+-------+--------+------+--------------+-------------------+--------+-------------------+---------------------------------------------+
Fetched 7 row(s) in 0.01s

{noformat}

> Add test for dynamic partition expr involving rand()
> ----------------------------------------------------
>
>                 Key: IMPALA-402
>                 URL: https://issues.apache.org/jira/browse/IMPALA-402
>             Project: IMPALA
>          Issue Type: Improvement
>          Components: Infrastructure
>    Affects Versions: Impala 1.0, Impala 2.5.0, Impala 2.6.0, Impala 2.7.0, 
> Impala 2.8.0, Impala 2.9.0
>         Environment: CentOS 6.3
>            Reporter: Benyi Wang
>            Priority: Major
>
> I found two problems:
> * "Insert overwrite table" doesn't clean up the directory (external table)
> {code}
> $ hadoop fs -ls -R /user/benyiw/tmp_abc;
> drwxr-xr-x   - impala supergroup          0 2013-06-06 12:46 
> /user/benyiw/tmp_abc/slot=1
> -rw-r--r--   2 impala supergroup      16088 2013-06-06 12:46 
> /user/benyiw/tmp_abc/slot=1/3456606565886086588--5331466032849119435_641430213_data.0
> -rw-r--r--   2 impala supergroup     100691 2013-06-06 12:46 
> /user/benyiw/tmp_abc/slot=1/3456606565886086588--5331466032849119436_1260163059_data.0
> -rw-r--r--   2 impala supergroup      43875 2013-06-06 12:46 
> /user/benyiw/tmp_abc/slot=1/3456606565886086588--5331466032849119437_929705780_data.0
> drwxr-xr-x   - impala supergroup          0 2013-06-06 12:40 
> /user/benyiw/tmp_abc/slot=2
> -rw-r--r--   2 impala supergroup          8 2013-06-06 12:40 
> /user/benyiw/tmp_abc/slot=2/-8660787917599456385--5527614477985301990_1328141055_data.0
> drwxr-xr-x   - impala supergroup          0 2013-06-06 12:40 
> /user/benyiw/tmp_abc/slot=3
> -rw-r--r--   2 impala supergroup          8 2013-06-06 12:40 
> /user/benyiw/tmp_abc/slot=3/-8660787917599456385--5527614477985301990_501684742_data.0
> drwxr-xr-x   - impala supergroup          0 2013-06-06 12:47 
> /user/benyiw/tmp_abc/slot=b
> -rw-r--r--   2 impala supergroup      16130 2013-06-06 12:47 
> /user/benyiw/tmp_abc/slot=b/705210285518833776--6520969021873146409_792816606_data.0
> -rw-r--r--   2 impala supergroup     100728 2013-06-06 12:47 
> /user/benyiw/tmp_abc/slot=b/705210285518833776--6520969021873146410_157404218_data.0
> -rw-r--r--   2 impala supergroup      43796 2013-06-06 12:47 
> /user/benyiw/tmp_abc/slot=b/705210285518833776--6520969021873146411_157404218_data.0
> {code}
> * When I ran the following queries, all output files are put into the same 
> partition. 
> {code}
> create table tmp_abc (
>   customer_id string,
>   email string
> ) partitioned by (slot string)
> row format delimited fields terminated by '\t' lines terminated by '\n'
> stored as TextFile
> location '/user/benyiw/tmp_abc';
> insert overwrite table tmp_abc partition (slot) select customer_id, email, 
> case when slot1 < 0.10 then "a" when slot1 < 0.70 then "b" else "c" end as 
> slot from ( select customer_id, email, rand() as slot1 from (select 
> customer_id, max(email) as email, sum(case when seg_num >= 0 then 1 else 0 
> end) as included from customers where ( (seg_num in (1) and member = 'Y') or 
> (seg_num = -1) ) and site_key = 'a_site' and coll_def_id = 'everything' group 
> by customer_id having included > 0 ) a ) b
> {code}
> {code}
> $ hadoop fs -ls -R /user/benyiw/tmp_abc;
> drwxr-xr-x   - impala supergroup          0 2013-06-06 13:01 
> /user/benyiw/tmp_abc/slot=a
> -rw-r--r--   2 impala supergroup      16021 2013-06-06 13:01 
> /user/benyiw/tmp_abc/slot=a/-7883266034308591983--7883771993317985492_909811936_data.0
> -rw-r--r--   2 impala supergroup     100713 2013-06-06 13:01 
> /user/benyiw/tmp_abc/slot=a/-7883266034308591983--7883771993317985493_272258764_data.0
> -rw-r--r--   2 impala supergroup      43920 2013-06-06 13:01 
> /user/benyiw/tmp_abc/slot=a/-7883266034308591983--7883771993317985494_272258764_data.0
> {code}



--
This message was sent by Atlassian JIRA
(v7.6.3#76005)

---------------------------------------------------------------------
To unsubscribe, e-mail: issues-all-unsubscr...@impala.apache.org
For additional commands, e-mail: issues-all-h...@impala.apache.org

Reply via email to