[jira] [Updated] (HIVE-3734) Static partition DML create duplicate files and records

2012-11-26 Thread Gang Tim Liu (JIRA)

 [ 
https://issues.apache.org/jira/browse/HIVE-3734?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Gang Tim Liu updated HIVE-3734:
---

Description: 
Static DML create duplicate files and record.

Given the following test case, hive will return 2 records:
484 val_484
484 val_484

but srcpart returns one record:
484 val_484

If you look at file system, DML generates duplicate file with the same content:
-rw-r--r-- 1 gang THEFACEBOOK\Domain Users 5812 Nov 21 17:55 00_0
-rwxr-xr-x 1 gang THEFACEBOOK\Domain Users 5812 Nov 21 17:55 01_0


Test Case
===
set hive.mapred.supports.subdirectories=true;
set hive.exec.dynamic.partition=true;
set hive.exec.dynamic.partition.mode=nonstrict;
set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat;
set hive.merge.mapfiles=false;  
set hive.merge.mapredfiles=false;
set mapred.input.dir.recursive=true;


create table testtable (key String, value String) partitioned by (ds String, hr 
String) ;


explain extended
insert overwrite table testtable partition (ds='2008-04-08', hr='11') select 
key, value from srcpart where ds='2008-04-08';
insert overwrite table testtable partition (ds='2008-04-08', hr='11') select 
key, value from srcpart where ds='2008-04-08';


-- check DML result
desc formatted testtable partition (ds='2008-04-08', hr='11');

select count(1) from srcpart where ds='2008-04-08';
select count(1) from testtable where ds='2008-04-08';

select key, value from srcpart where ds='2008-04-08' and hr='11' and key = 
484;
set hive.optimize.listbucketing=true;
explain extended
select key, value from testtable where ds='2008-04-08' and hr='11' and key = 
484;
select key, value from testtable where ds='2008-04-08' and hr='11' and key = 
484;
===

  was:
Static DML create duplicate files and record.

Given the following test case, hive will return 2 records:
484 val_484
484 val_484

but srcpart returns one record:
484 val_484

If you look at file system, DML generates duplicate file with the same content:
-rw-r--r-- 1 gang THEFACEBOOK\Domain Users 5812 Nov 21 17:55 00_0
-rwxr-xr-x 1 gang THEFACEBOOK\Domain Users 5812 Nov 21 17:55 01_0


Test Case
===
set hive.mapred.supports.subdirectories=true;
set hive.exec.dynamic.partition=true;
set hive.exec.dynamic.partition.mode=nonstrict;
set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat;
set hive.merge.mapfiles=false;  
set hive.merge.mapredfiles=false;
set mapred.input.dir.recursive=true;


create table testtable (key String, value String) partitioned by (ds String, hr 
String) ;

-- list bucketing DML
explain extended
insert overwrite table testtable partition (ds='2008-04-08', hr='11') select 
key, value from srcpart where ds='2008-04-08';
insert overwrite table testtable partition (ds='2008-04-08', hr='11') select 
key, value from srcpart where ds='2008-04-08';


-- check DML result
desc formatted testtable partition (ds='2008-04-08', hr='11');

select count(1) from srcpart where ds='2008-04-08';
select count(1) from testtable where ds='2008-04-08';

select key, value from srcpart where ds='2008-04-08' and hr='11' and key = 
484;
set hive.optimize.listbucketing=true;
explain extended
select key, value from testtable where ds='2008-04-08' and hr='11' and key = 
484;
select key, value from testtable where ds='2008-04-08' and hr='11' and key = 
484;
===


 Static partition DML create duplicate files and records
 ---

 Key: HIVE-3734
 URL: https://issues.apache.org/jira/browse/HIVE-3734
 Project: Hive
  Issue Type: Bug
  Components: Query Processor
Affects Versions: 0.10.0
Reporter: Gang Tim Liu

 Static DML create duplicate files and record.
 Given the following test case, hive will return 2 records:
 484   val_484
 484   val_484
 but srcpart returns one record:
 484   val_484
 If you look at file system, DML generates duplicate file with the same 
 content:
 -rw-r--r-- 1 gang THEFACEBOOK\Domain Users 5812 Nov 21 17:55 00_0
 -rwxr-xr-x 1 gang THEFACEBOOK\Domain Users 5812 Nov 21 17:55 01_0
 Test Case
 ===
 set hive.mapred.supports.subdirectories=true;
 set hive.exec.dynamic.partition=true;
 set hive.exec.dynamic.partition.mode=nonstrict;
 set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat;
 set hive.merge.mapfiles=false;  
 set hive.merge.mapredfiles=false;
 set mapred.input.dir.recursive=true;
 create table testtable (key String, value String) partitioned by (ds String, 
 hr String) ;
 explain extended
 insert overwrite table testtable partition (ds='2008-04-08', hr='11') select 
 key, value from srcpart where ds='2008-04-08';
 insert overwrite table testtable partition (ds='2008-04-08', hr='11') select 
 key, value from srcpart where ds='2008-04-08';
 -- check DML result
 desc formatted testtable partition (ds='2008-04-08', hr='11');
 select 

[jira] [Updated] (HIVE-3734) Static partition DML create duplicate files and records

2012-11-26 Thread Gang Tim Liu (JIRA)

 [ 
https://issues.apache.org/jira/browse/HIVE-3734?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Gang Tim Liu updated HIVE-3734:
---

Description: 
Static DML create duplicate files and record.

Given the following test case, hive will return 2 records:
484 val_484
484 val_484

but srcpart returns one record:
484 val_484

If you look at file system, DML generates duplicate file with the same content:
-rw-r--r-- 1 gang THEFACEBOOK\Domain Users 5812 Nov 21 17:55 00_0
-rwxr-xr-x 1 gang THEFACEBOOK\Domain Users 5812 Nov 21 17:55 01_0


Test Case
===
set hive.mapred.supports.subdirectories=true;
set hive.exec.dynamic.partition=true;
set hive.exec.dynamic.partition.mode=nonstrict;
set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat;
set hive.merge.mapfiles=false;  
set hive.merge.mapredfiles=false;
set mapred.input.dir.recursive=true;


create table testtable (key String, value String) partitioned by (ds String, hr 
String) ;


explain extended
insert overwrite table testtable partition (ds='2008-04-08', hr='11') select 
key, value from srcpart where ds='2008-04-08';
insert overwrite table testtable partition (ds='2008-04-08', hr='11') select 
key, value from srcpart where ds='2008-04-08';


-- check DML result
desc formatted testtable partition (ds='2008-04-08', hr='11');

select count(1) from srcpart where ds='2008-04-08';
select count(1) from testtable where ds='2008-04-08';

select key, value from srcpart where ds='2008-04-08' and hr='11' and key = 
484;
explain extended
select key, value from testtable where ds='2008-04-08' and hr='11' and key = 
484;
select key, value from testtable where ds='2008-04-08' and hr='11' and key = 
484;
===

  was:
Static DML create duplicate files and record.

Given the following test case, hive will return 2 records:
484 val_484
484 val_484

but srcpart returns one record:
484 val_484

If you look at file system, DML generates duplicate file with the same content:
-rw-r--r-- 1 gang THEFACEBOOK\Domain Users 5812 Nov 21 17:55 00_0
-rwxr-xr-x 1 gang THEFACEBOOK\Domain Users 5812 Nov 21 17:55 01_0


Test Case
===
set hive.mapred.supports.subdirectories=true;
set hive.exec.dynamic.partition=true;
set hive.exec.dynamic.partition.mode=nonstrict;
set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat;
set hive.merge.mapfiles=false;  
set hive.merge.mapredfiles=false;
set mapred.input.dir.recursive=true;


create table testtable (key String, value String) partitioned by (ds String, hr 
String) ;


explain extended
insert overwrite table testtable partition (ds='2008-04-08', hr='11') select 
key, value from srcpart where ds='2008-04-08';
insert overwrite table testtable partition (ds='2008-04-08', hr='11') select 
key, value from srcpart where ds='2008-04-08';


-- check DML result
desc formatted testtable partition (ds='2008-04-08', hr='11');

select count(1) from srcpart where ds='2008-04-08';
select count(1) from testtable where ds='2008-04-08';

select key, value from srcpart where ds='2008-04-08' and hr='11' and key = 
484;
set hive.optimize.listbucketing=true;
explain extended
select key, value from testtable where ds='2008-04-08' and hr='11' and key = 
484;
select key, value from testtable where ds='2008-04-08' and hr='11' and key = 
484;
===


 Static partition DML create duplicate files and records
 ---

 Key: HIVE-3734
 URL: https://issues.apache.org/jira/browse/HIVE-3734
 Project: Hive
  Issue Type: Bug
  Components: Query Processor
Affects Versions: 0.10.0
Reporter: Gang Tim Liu

 Static DML create duplicate files and record.
 Given the following test case, hive will return 2 records:
 484   val_484
 484   val_484
 but srcpart returns one record:
 484   val_484
 If you look at file system, DML generates duplicate file with the same 
 content:
 -rw-r--r-- 1 gang THEFACEBOOK\Domain Users 5812 Nov 21 17:55 00_0
 -rwxr-xr-x 1 gang THEFACEBOOK\Domain Users 5812 Nov 21 17:55 01_0
 Test Case
 ===
 set hive.mapred.supports.subdirectories=true;
 set hive.exec.dynamic.partition=true;
 set hive.exec.dynamic.partition.mode=nonstrict;
 set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat;
 set hive.merge.mapfiles=false;  
 set hive.merge.mapredfiles=false;
 set mapred.input.dir.recursive=true;
 create table testtable (key String, value String) partitioned by (ds String, 
 hr String) ;
 explain extended
 insert overwrite table testtable partition (ds='2008-04-08', hr='11') select 
 key, value from srcpart where ds='2008-04-08';
 insert overwrite table testtable partition (ds='2008-04-08', hr='11') select 
 key, value from srcpart where ds='2008-04-08';
 -- check DML result
 desc formatted testtable partition (ds='2008-04-08', hr='11');
 select count(1) from srcpart where ds='2008-04-08';
 select count(1) from 

[jira] [Updated] (HIVE-3734) Static partition DML create duplicate files and records

2012-11-26 Thread Gang Tim Liu (JIRA)

 [ 
https://issues.apache.org/jira/browse/HIVE-3734?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Gang Tim Liu updated HIVE-3734:
---

Description: 
Static DML create duplicate files and record.

Given the following test case, hive will return 2 records:
484 val_484
484 val_484

but srcpart returns one record:
484 val_484

If you look at file system, DML generates duplicate file with the same content:
-rw-r--r-- 1 gang THEFACEBOOK\Domain Users 5812 Nov 21 17:55 00_0
-rwxr-xr-x 1 gang THEFACEBOOK\Domain Users 5812 Nov 21 17:55 01_0


Test Case
===
set hive.mapred.supports.subdirectories=true;
set hive.exec.dynamic.partition=true;
set hive.exec.dynamic.partition.mode=nonstrict;
set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat;
set hive.merge.mapfiles=false;
set hive.merge.mapredfiles=false;
set mapred.input.dir.recursive=true;

create table testtable (key String, value String) partitioned by (ds String, hr 
String) ;

explain extended
insert overwrite table testtable partition (ds='2008-04-08', hr='11') select 
key, value from srcpart where ds='2008-04-08';
insert overwrite table testtable partition (ds='2008-04-08', hr='11') select 
key, value from srcpart where ds='2008-04-08';

desc formatted testtable partition (ds='2008-04-08', hr='11');

select count(1) from srcpart where ds='2008-04-08';
select count(1) from testtable where ds='2008-04-08';

select key, value from srcpart where ds='2008-04-08' and hr='11' and key = 
484;
explain extended
select key, value from testtable where ds='2008-04-08' and hr='11' and key = 
484;
select key, value from testtable where ds='2008-04-08' and hr='11' and key = 
484;
===

  was:
Static DML create duplicate files and record.

Given the following test case, hive will return 2 records:
484 val_484
484 val_484

but srcpart returns one record:
484 val_484

If you look at file system, DML generates duplicate file with the same content:
-rw-r--r-- 1 gang THEFACEBOOK\Domain Users 5812 Nov 21 17:55 00_0
-rwxr-xr-x 1 gang THEFACEBOOK\Domain Users 5812 Nov 21 17:55 01_0


Test Case
===
set hive.mapred.supports.subdirectories=true;
set hive.exec.dynamic.partition=true;
set hive.exec.dynamic.partition.mode=nonstrict;
set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat;
set hive.merge.mapfiles=false;  
set hive.merge.mapredfiles=false;
set mapred.input.dir.recursive=true;


create table testtable (key String, value String) partitioned by (ds String, hr 
String) ;


explain extended
insert overwrite table testtable partition (ds='2008-04-08', hr='11') select 
key, value from srcpart where ds='2008-04-08';
insert overwrite table testtable partition (ds='2008-04-08', hr='11') select 
key, value from srcpart where ds='2008-04-08';


-- check DML result
desc formatted testtable partition (ds='2008-04-08', hr='11');

select count(1) from srcpart where ds='2008-04-08';
select count(1) from testtable where ds='2008-04-08';

select key, value from srcpart where ds='2008-04-08' and hr='11' and key = 
484;
explain extended
select key, value from testtable where ds='2008-04-08' and hr='11' and key = 
484;
select key, value from testtable where ds='2008-04-08' and hr='11' and key = 
484;
===


 Static partition DML create duplicate files and records
 ---

 Key: HIVE-3734
 URL: https://issues.apache.org/jira/browse/HIVE-3734
 Project: Hive
  Issue Type: Bug
  Components: Query Processor
Affects Versions: 0.10.0
Reporter: Gang Tim Liu

 Static DML create duplicate files and record.
 Given the following test case, hive will return 2 records:
 484   val_484
 484   val_484
 but srcpart returns one record:
 484   val_484
 If you look at file system, DML generates duplicate file with the same 
 content:
 -rw-r--r-- 1 gang THEFACEBOOK\Domain Users 5812 Nov 21 17:55 00_0
 -rwxr-xr-x 1 gang THEFACEBOOK\Domain Users 5812 Nov 21 17:55 01_0
 Test Case
 ===
 set hive.mapred.supports.subdirectories=true;
 set hive.exec.dynamic.partition=true;
 set hive.exec.dynamic.partition.mode=nonstrict;
 set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat;
 set hive.merge.mapfiles=false;
 set hive.merge.mapredfiles=false;
 set mapred.input.dir.recursive=true;
 create table testtable (key String, value String) partitioned by (ds String, 
 hr String) ;
 explain extended
 insert overwrite table testtable partition (ds='2008-04-08', hr='11') select 
 key, value from srcpart where ds='2008-04-08';
 insert overwrite table testtable partition (ds='2008-04-08', hr='11') select 
 key, value from srcpart where ds='2008-04-08';
 desc formatted testtable partition (ds='2008-04-08', hr='11');
 select count(1) from srcpart where ds='2008-04-08';
 select count(1) from testtable where ds='2008-04-08';
 select key, value from srcpart where 

[jira] [Updated] (HIVE-3734) Static partition DML create duplicate files and records

2012-11-21 Thread Gang Tim Liu (JIRA)

 [ 
https://issues.apache.org/jira/browse/HIVE-3734?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Gang Tim Liu updated HIVE-3734:
---

Description: 
Static DML create duplicate files and record.

Given the attached test case, hive will return 2 records:
484 val_484
484 val_484

but srcpart returns one record:
484 val_484

If you look at file system, DML generates duplicate file with the same content:
-rw-r--r-- 1 gang THEFACEBOOK\Domain Users 5812 Nov 21 17:55 00_0
-rwxr-xr-x 1 gang THEFACEBOOK\Domain Users 5812 Nov 21 17:55 01_0


  was:
Static DML create duplicate files and record.

Given the attached test case, hive will return 2 records:
484 val_484
484 val_484

but srcpart returns one record:
484 val_484

If you look at file system, DML generates duplicate file.



 Static partition DML create duplicate files and records
 ---

 Key: HIVE-3734
 URL: https://issues.apache.org/jira/browse/HIVE-3734
 Project: Hive
  Issue Type: Bug
  Components: Query Processor
Affects Versions: 0.10.0
Reporter: Gang Tim Liu

 Static DML create duplicate files and record.
 Given the attached test case, hive will return 2 records:
 484   val_484
 484   val_484
 but srcpart returns one record:
 484   val_484
 If you look at file system, DML generates duplicate file with the same 
 content:
 -rw-r--r-- 1 gang THEFACEBOOK\Domain Users 5812 Nov 21 17:55 00_0
 -rwxr-xr-x 1 gang THEFACEBOOK\Domain Users 5812 Nov 21 17:55 01_0

--
This message is automatically generated by JIRA.
If you think it was sent incorrectly, please contact your JIRA administrators
For more information on JIRA, see: http://www.atlassian.com/software/jira


[jira] [Updated] (HIVE-3734) Static partition DML create duplicate files and records

2012-11-21 Thread Gang Tim Liu (JIRA)

 [ 
https://issues.apache.org/jira/browse/HIVE-3734?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Gang Tim Liu updated HIVE-3734:
---

Description: 
Static DML create duplicate files and record.

Given the following test case, hive will return 2 records:
484 val_484
484 val_484

but srcpart returns one record:
484 val_484

If you look at file system, DML generates duplicate file with the same content:
-rw-r--r-- 1 gang THEFACEBOOK\Domain Users 5812 Nov 21 17:55 00_0
-rwxr-xr-x 1 gang THEFACEBOOK\Domain Users 5812 Nov 21 17:55 01_0


Test Case
===
set hive.mapred.supports.subdirectories=true;
set hive.exec.dynamic.partition=true;
set hive.exec.dynamic.partition.mode=nonstrict;
set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat;
set hive.merge.mapfiles=false;  
set hive.merge.mapredfiles=false;
set mapred.input.dir.recursive=true;


create table testtable (key String, value String) partitioned by (ds String, hr 
String) ;

-- list bucketing DML
explain extended
insert overwrite table testtable partition (ds='2008-04-08', hr='11') select 
key, value from srcpart where ds='2008-04-08';
insert overwrite table testtable partition (ds='2008-04-08', hr='11') select 
key, value from srcpart where ds='2008-04-08';


-- check DML result
desc formatted testtable partition (ds='2008-04-08', hr='11');

select count(1) from srcpart where ds='2008-04-08';
select count(1) from testtable where ds='2008-04-08';

select key, value from srcpart where ds='2008-04-08' and hr='11' and key = 
484;
set hive.optimize.listbucketing=true;
explain extended
select key, value from testtable where ds='2008-04-08' and hr='11' and key = 
484;
select key, value from testtable where ds='2008-04-08' and hr='11' and key = 
484;
===

  was:
Static DML create duplicate files and record.

Given the attached test case, hive will return 2 records:
484 val_484
484 val_484

but srcpart returns one record:
484 val_484

If you look at file system, DML generates duplicate file with the same content:
-rw-r--r-- 1 gang THEFACEBOOK\Domain Users 5812 Nov 21 17:55 00_0
-rwxr-xr-x 1 gang THEFACEBOOK\Domain Users 5812 Nov 21 17:55 01_0



 Static partition DML create duplicate files and records
 ---

 Key: HIVE-3734
 URL: https://issues.apache.org/jira/browse/HIVE-3734
 Project: Hive
  Issue Type: Bug
  Components: Query Processor
Affects Versions: 0.10.0
Reporter: Gang Tim Liu

 Static DML create duplicate files and record.
 Given the following test case, hive will return 2 records:
 484   val_484
 484   val_484
 but srcpart returns one record:
 484   val_484
 If you look at file system, DML generates duplicate file with the same 
 content:
 -rw-r--r-- 1 gang THEFACEBOOK\Domain Users 5812 Nov 21 17:55 00_0
 -rwxr-xr-x 1 gang THEFACEBOOK\Domain Users 5812 Nov 21 17:55 01_0
 Test Case
 ===
 set hive.mapred.supports.subdirectories=true;
 set hive.exec.dynamic.partition=true;
 set hive.exec.dynamic.partition.mode=nonstrict;
 set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat;
 set hive.merge.mapfiles=false;  
 set hive.merge.mapredfiles=false;
 set mapred.input.dir.recursive=true;
 create table testtable (key String, value String) partitioned by (ds String, 
 hr String) ;
 -- list bucketing DML
 explain extended
 insert overwrite table testtable partition (ds='2008-04-08', hr='11') select 
 key, value from srcpart where ds='2008-04-08';
 insert overwrite table testtable partition (ds='2008-04-08', hr='11') select 
 key, value from srcpart where ds='2008-04-08';
 -- check DML result
 desc formatted testtable partition (ds='2008-04-08', hr='11');
 select count(1) from srcpart where ds='2008-04-08';
 select count(1) from testtable where ds='2008-04-08';
 select key, value from srcpart where ds='2008-04-08' and hr='11' and key = 
 484;
 set hive.optimize.listbucketing=true;
 explain extended
 select key, value from testtable where ds='2008-04-08' and hr='11' and key = 
 484;
 select key, value from testtable where ds='2008-04-08' and hr='11' and key = 
 484;
 ===

--
This message is automatically generated by JIRA.
If you think it was sent incorrectly, please contact your JIRA administrators
For more information on JIRA, see: http://www.atlassian.com/software/jira