[jira] [Updated] (HIVE-28213) Incorrect results after insert-select from similar bucketed source & target table

2024-04-24 Thread Naresh P R (Jira)


 [ 
https://issues.apache.org/jira/browse/HIVE-28213?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Naresh P R updated HIVE-28213:
--
Description: 
Insert-select is not honoring bucketing if both source & target are bucketed on 
same column.

eg., 
{code:java}
CREATE EXTERNAL TABLE bucketing_table1 (id INT)
CLUSTERED BY (id)
SORTED BY (id ASC)
INTO 32 BUCKETS stored as textfile;

INSERT INTO TABLE bucketing_table1 VALUES (1), (2), (3), (4), (5);

CREATE EXTERNAL TABLE bucketing_table2 like bucketing_table1;

INSERT INTO TABLE bucketing_table2 select * from bucketing_table1;{code}
id=1 => murmur_hash(1) %32 should go to 29th bucket file.

bucketing_table1 has id=1 at 29th file,

but bucketing_table2 doesn't have 29th file because Insert-select dint honor 
the bucketing.
{code:java}
SELECT count(*) FROM bucketing_table1 WHERE id = 1;
===
1 //correct result

SELECT count(*) FROM bucketing_table2 WHERE id = 1;   
=== 
0 // incorrect result


select *, INPUT__FILE__NAME from bucketing_table1;
+--++
| bucketing_table1.id  |                 input__file__name                  |
+--++
| 2                    | /bucketing_table1/04_0 |
| 3                    | /bucketing_table1/06_0 |
| 5                    | /bucketing_table1/15_0 |
| 4                    | /bucketing_table1/21_0 |
| 1                    | /bucketing_table1/29_0 |
+--++

select *, INPUT__FILE__NAME from bucketing_table2;
+-++
| bucketing_table2.id  |                 input__file__name                  |
+-++
| 2           | /bucketing_table2/00_0 |
| 3           | /bucketing_table2/01_0 |
| 5           | /bucketing_table2/02_0 |
| 4           | /bucketing_table2/03_0 |
| 1           | /bucketing_table2/04_0 |
+--++{code}
Workaround for read: hive.tez.bucket.pruning=false;

PS: Attaching repro file [^test.q]

  was:
Insert-select is not honoring bucketing if both source & target are bucketed on 
same column.

eg., 
{code:java}
CREATE EXTERNAL TABLE bucketing_table1 (id INT)
CLUSTERED BY (id)
SORTED BY (id ASC)
INTO 32 BUCKETS stored as textfile;

INSERT INTO TABLE bucketing_table1 VALUES (1), (2), (3), (4), (5);

CREATE EXTERNAL TABLE bucketing_table2 like bucketing_table1;

INSERT INTO TABLE bucketing_table2 select * from bucketing_table1;{code}
id=1 => murmur_hash(1) %32 should go to 29th bucket file.

bucketing_table1 has id=1 at 29th file,

but bucketing_table2 doesn't have 29th file because Insert-select dint honor 
the bucketing.
{code:java}
SELECT count(*) FROM bucketing_table1 WHERE id = 1;
===
1 //correct result

SELECT count(*) FROM bucketing_table2 WHERE id = 1;   
=== 
0 // incorrect result


select *, INPUT__FILE__NAME from bucketing_table1;
+--++
| bucketing_table1.id  |                 input__file__name                  |
+--++
| 2                    | /bucketing_table1/04_0 |
| 3                    | /bucketing_table1/06_0 |
| 5                    | /bucketing_table1/15_0 |
| 4                    | /bucketing_table1/21_0 |
| 1                    | /bucketing_table1/29_0 |
+--++

select *, INPUT__FILE__NAME from bucketing_table2;
+-++
| bucketing_table2.id  |                 input__file__name                  |
+-++
| 2           | /bucketing_table2/00_0 |
| 3           | /bucketing_table2/01_0 |
| 5           | /bucketing_table2/02_0 |
| 4           | /bucketing_table2/03_0 |
| 1           | /bucketing_table2/04_0 |
+--++{code}
Query to identify in which bucketFile a particular row should be

 
{code:java}
with t as (select *, murmur_hash(id)%32 as bucket, INPUT__FILE__NAME from 
bucketing_table1)
select id, (case when bucket > 0 then bucket else 32 + bucket end) as 
bucket_number, INPUT__FILE__NAME from t; 
+-+++
| id  | bucket_number  |                 input__file__name                  |
+-+++
| 2   | 4              | /bucketing_table1/04_0 |
| 3   | 6              | /bucketing_table1/06_0 |
| 5   | 15             | 

[jira] [Updated] (HIVE-28213) Incorrect results after insert-select from similar bucketed source & target table

2024-04-24 Thread Naresh P R (Jira)


 [ 
https://issues.apache.org/jira/browse/HIVE-28213?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Naresh P R updated HIVE-28213:
--
Description: 
Insert-select is not honoring bucketing if both source & target are bucketed on 
same column.

eg., 
{code:java}
CREATE EXTERNAL TABLE bucketing_table1 (id INT)
CLUSTERED BY (id)
SORTED BY (id ASC)
INTO 32 BUCKETS stored as textfile;

INSERT INTO TABLE bucketing_table1 VALUES (1), (2), (3), (4), (5);

CREATE EXTERNAL TABLE bucketing_table2 like bucketing_table1;

INSERT INTO TABLE bucketing_table2 select * from bucketing_table1;{code}
id=1 => murmur_hash(1) %32 should go to 29th bucket file.

bucketing_table1 has id=1 at 29th file,

but bucketing_table2 doesn't have 29th file because Insert-select dint honor 
the bucketing.
{code:java}
SELECT count(*) FROM bucketing_table1 WHERE id = 1;
===
1 //correct result

SELECT count(*) FROM bucketing_table2 WHERE id = 1;   
=== 
0 // incorrect result


select *, INPUT__FILE__NAME from bucketing_table1;
+--++
| bucketing_table1.id  |                 input__file__name                  |
+--++
| 2                    | /bucketing_table1/04_0 |
| 3                    | /bucketing_table1/06_0 |
| 5                    | /bucketing_table1/15_0 |
| 4                    | /bucketing_table1/21_0 |
| 1                    | /bucketing_table1/29_0 |
+--++

select *, INPUT__FILE__NAME from bucketing_table2;
+-++
| bucketing_table2.id  |                 input__file__name                  |
+-++
| 2           | /bucketing_table2/00_0 |
| 3           | /bucketing_table2/01_0 |
| 5           | /bucketing_table2/02_0 |
| 4           | /bucketing_table2/03_0 |
| 1           | /bucketing_table2/04_0 |
+--++{code}
Query to identify in which bucketFile a particular row should be

 
{code:java}
with t as (select *, murmur_hash(id)%32 as bucket, INPUT__FILE__NAME from 
bucketing_table1)
select id, (case when bucket > 0 then bucket else 32 + bucket end) as 
bucket_number, INPUT__FILE__NAME from t; 
+-+++
| id  | bucket_number  |                 input__file__name                  |
+-+++
| 2   | 4              | /bucketing_table1/04_0 |
| 3   | 6              | /bucketing_table1/06_0 |
| 5   | 15             | /bucketing_table1/15_0 |
| 4   | 21             | /bucketing_table1/21_0 |
| 1   | 29             | /bucketing_table1/29_0 |
+-+++{code}
 

Workaround for read: hive.tez.bucket.pruning=false;

PS: Attaching repro file [^test.q]

  was:
Insert-select is not honoring bucketing if both source & target are bucketed on 
same column.

eg., 
{code:java}
CREATE EXTERNAL TABLE bucketing_table1 (id INT)
CLUSTERED BY (id)
SORTED BY (id ASC)
INTO 32 BUCKETS stored as textfile;

INSERT INTO TABLE bucketing_table1 VALUES (1), (2), (3), (4), (5);

CREATE EXTERNAL TABLE bucketing_table2 like bucketing_table1;

INSERT INTO TABLE bucketing_table2 select * from bucketing_table1;{code}
id=1 => murmur_hash(1) %32 should go to 29th bucket file.

bucketing_table1 has id=1 at 29th file,

but bucketing_table2 doesn't have 29th file because Insert-select dint honor 
the bucketing.
{code:java}
SELECT count(*) FROM bucketing_table1 WHERE id = 1;
===
1 //correct result

SELECT count(*) FROM bucketing_table2 WHERE id = 1;   
=== 
0 // incorrect result


select *, INPUT__FILE__NAME from bucketing_table1;
+--++
| bucketing_table1.id  |                 input__file__name                  |
+--++
| 2                    | /bucketing_table1/04_0 |
| 3                    | /bucketing_table1/06_0 |
| 5                    | /bucketing_table1/15_0 |
| 4                    | /bucketing_table1/21_0 |
| 1                    | /bucketing_table1/29_0 |
+--++

select *, INPUT__FILE__NAME from bucketing_table2;
+-++
| bucketing_table2.id  |                 input__file__name                  |
+-++
| 2           | /bucketing_table2/00_0 |
| 3           | /bucketing_table2/01_0 |
| 5     

[jira] [Updated] (HIVE-28213) Incorrect results after insert-select from similar bucketed source & target table

2024-04-24 Thread Naresh P R (Jira)


 [ 
https://issues.apache.org/jira/browse/HIVE-28213?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Naresh P R updated HIVE-28213:
--
Description: 
Insert-select is not honoring bucketing if both source & target are bucketed on 
same column.

eg., 
{code:java}
CREATE EXTERNAL TABLE bucketing_table1 (id INT)
CLUSTERED BY (id)
SORTED BY (id ASC)
INTO 32 BUCKETS stored as textfile;

INSERT INTO TABLE bucketing_table1 VALUES (1), (2), (3), (4), (5);

CREATE EXTERNAL TABLE bucketing_table2 like bucketing_table1;

INSERT INTO TABLE bucketing_table2 select * from bucketing_table1;{code}
id=1 => murmur_hash(1) %32 should go to 29th bucket file.

bucketing_table1 has id=1 at 29th file,

but bucketing_table2 doesn't have 29th file because Insert-select dint honor 
the bucketing.
{code:java}
SELECT count(*) FROM bucketing_table1 WHERE id = 1;
===
1 //correct result

SELECT count(*) FROM bucketing_table2 WHERE id = 1;   
=== 
0 // incorrect result


select *, INPUT__FILE__NAME from bucketing_table1;
+--++
| bucketing_table1.id  |                 input__file__name                  |
+--++
| 2                    | /bucketing_table1/04_0 |
| 3                    | /bucketing_table1/06_0 |
| 5                    | /bucketing_table1/15_0 |
| 4                    | /bucketing_table1/21_0 |
| 1                    | /bucketing_table1/29_0 |
+--++

select *, INPUT__FILE__NAME from bucketing_table2;
+-++
| bucketing_table2.id  |                 input__file__name                  |
+-++
| 2           | /bucketing_table2/00_0 |
| 3           | /bucketing_table2/01_0 |
| 5           | /bucketing_table2/02_0 |
| 4           | /bucketing_table2/03_0 |
| 1           | /bucketing_table2/04_0 |
+--++{code}
Workaround for read: hive.tez.bucket.pruning=false;

PS: Attaching repro file [^test.q]

  was:
Insert-select is not honoring bucketing if both source & target are bucketed on 
same column.

eg., 
{code:java}
CREATE EXTERNAL TABLE bucketing_table1 (id INT)
CLUSTERED BY (id)
SORTED BY (id ASC)
INTO 32 BUCKETS stored as textfile;

INSERT INTO TABLE bucketing_table1 VALUES (1), (2), (3), (4), (5);

CREATE EXTERNAL TABLE bucketing_table2 like bucketing_table1;

INSERT INTO TABLE bucketing_table2 select * from bucketing_table1;{code}
id=1 => murmur_hash(1) %32 should go to 29th bucket file.

bucketing_table1 has id=1 at 29th file,

but bucketing_table2 doesn't have 29th file because Insert-select dint honor 
the bucketing.
{code:java}
SELECT count(*) FROM bucketing_table1 WHERE id = 1;
===
1 //correct result

SELECT count(*) FROM bucketing_table2 WHERE id = 1;  
===
0 // incorrect result{code}
Workaround: hive.tez.bucket.pruning=false;

PS: Attaching repro file [^test.q]


> Incorrect results after insert-select from similar bucketed source & target 
> table
> -
>
> Key: HIVE-28213
> URL: https://issues.apache.org/jira/browse/HIVE-28213
> Project: Hive
>  Issue Type: Bug
>Reporter: Naresh P R
>Priority: Major
> Attachments: test.q
>
>
> Insert-select is not honoring bucketing if both source & target are bucketed 
> on same column.
> eg., 
> {code:java}
> CREATE EXTERNAL TABLE bucketing_table1 (id INT)
> CLUSTERED BY (id)
> SORTED BY (id ASC)
> INTO 32 BUCKETS stored as textfile;
> INSERT INTO TABLE bucketing_table1 VALUES (1), (2), (3), (4), (5);
> CREATE EXTERNAL TABLE bucketing_table2 like bucketing_table1;
> INSERT INTO TABLE bucketing_table2 select * from bucketing_table1;{code}
> id=1 => murmur_hash(1) %32 should go to 29th bucket file.
> bucketing_table1 has id=1 at 29th file,
> but bucketing_table2 doesn't have 29th file because Insert-select dint honor 
> the bucketing.
> {code:java}
> SELECT count(*) FROM bucketing_table1 WHERE id = 1;
> ===
> 1 //correct result
> SELECT count(*) FROM bucketing_table2 WHERE id = 1;   
> === 
> 0 // incorrect result
> select *, INPUT__FILE__NAME from bucketing_table1;
> +--++
> | bucketing_table1.id  |                 input__file__name                  |
> +--++
> | 2                    | /bucketing_table1/04_0 |
> | 3                    | /bucketing_table1/06_0 |
> | 5                    | /bucketing_table1/15_0 |
> | 4                    | /bucketing_table1/21_0 |
> | 1 

[jira] [Created] (HIVE-28213) Incorrect results after insert-select from similar bucketed source & target table

2024-04-24 Thread Naresh P R (Jira)
Naresh P R created HIVE-28213:
-

 Summary: Incorrect results after insert-select from similar 
bucketed source & target table
 Key: HIVE-28213
 URL: https://issues.apache.org/jira/browse/HIVE-28213
 Project: Hive
  Issue Type: Bug
Reporter: Naresh P R
 Attachments: test.q

Insert-select is not honoring bucketing if both source & target are bucketed on 
same column.

eg., 
{code:java}
CREATE EXTERNAL TABLE bucketing_table1 (id INT)
CLUSTERED BY (id)
SORTED BY (id ASC)
INTO 32 BUCKETS stored as textfile;

INSERT INTO TABLE bucketing_table1 VALUES (1), (2), (3), (4), (5);

CREATE EXTERNAL TABLE bucketing_table2 like bucketing_table1;

INSERT INTO TABLE bucketing_table2 select * from bucketing_table1;{code}
id=1 => murmur_hash(1) %32 should go to 29th bucket file.

bucketing_table1 has id=1 at 29th file,

but bucketing_table2 doesn't have 29th file because Insert-select dint honor 
the bucketing.
{code:java}
SELECT count(*) FROM bucketing_table1 WHERE id = 1;
===
1 //correct result

SELECT count(*) FROM bucketing_table2 WHERE id = 1;  
===
0 // incorrect result{code}
Workaround: hive.tez.bucket.pruning=false;

PS: Attaching repro file [^test.q]



--
This message was sent by Atlassian Jira
(v8.20.10#820010)


[jira] [Commented] (HIVE-28117) add_months() with output_date_format returning wrong year on leap day

2024-03-14 Thread Naresh P R (Jira)


[ 
https://issues.apache.org/jira/browse/HIVE-28117?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=17827257#comment-17827257
 ] 

Naresh P R commented on HIVE-28117:
---

Can you try your usecase with -MM format ?

eg.,

select add_months(dt, -2, '-MM')

> add_months() with output_date_format returning wrong year on leap day
> -
>
> Key: HIVE-28117
> URL: https://issues.apache.org/jira/browse/HIVE-28117
> Project: Hive
>  Issue Type: Bug
>  Components: Hive
>Affects Versions: 3.1.3
>Reporter: Jesse Petre
>Priority: Minor
> Attachments: 2024-03-11_12-11-11.png
>
>
> I use an output_date_format option on the add_months() function like so:
> {{select add_months(dt, -2, '-MM')}}
> On leap day, 2024-02-29, this incorrectly returned 2024-12.  I expected 
> 2023-12.  All other days it works fine, only leap day it gave the wrong 
> result.
>  
> Omitting the output date format will make it calculate the date correctly.  
> Including the output date format gives the wrong result.



--
This message was sent by Atlassian Jira
(v8.20.10#820010)


[jira] [Commented] (HIVE-27965) Table/partition rename takes a long time at PART_COL_STATS for wide tables

2023-12-21 Thread Naresh P R (Jira)


[ 
https://issues.apache.org/jira/browse/HIVE-27965?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=17799649#comment-17799649
 ] 

Naresh P R commented on HIVE-27965:
---

[~zhangbutao] 

Yes, this helps. Thanks for letting me know.

> Table/partition rename takes a long time at PART_COL_STATS for wide tables
> --
>
> Key: HIVE-27965
> URL: https://issues.apache.org/jira/browse/HIVE-27965
> Project: Hive
>  Issue Type: Improvement
>Reporter: Naresh P R
>Priority: Major
>
> Partition table rename gets clogged at PART_COL_STATS for wide tables.
> {code:java}
> CREATE TABLE IF NOT EXISTS `PART_COL_STATS` (
>  ...
>  `DB_NAME` varchar(128) CHARACTER SET latin1 COLLATE latin1_bin NOT NULL,
>  `TABLE_NAME` varchar(128) CHARACTER SET latin1 COLLATE latin1_bin NOT NULL,
>  `PARTITION_NAME` varchar(767) CHARACTER SET latin1 COLLATE latin1_bin NOT 
> NULL, 
>   ...){code}
> Since PART_COL_STATS holds db_name & table_name, incase of table rename, 
> every row in PART_COL_STATS associated with the table should be fetched, 
> stored in memory, delete & re-insert with new db/table/partition name.
>  
> Remove DB_NAME, TABLE_NAME, PARTITION_NAME from PART_COL_STATS, instead use 
> TBL_ID, DB_ID, PART_ID to avoid touching PART_COL_STATS for table/partition 
> renames.
> Also TBL_ID, DB_ID, PART_ID can be used for PART_COL_STATS INDEXING.



--
This message was sent by Atlassian Jira
(v8.20.10#820010)


[jira] [Comment Edited] (HIVE-27964) Support drop stats similar to Impala

2023-12-21 Thread Naresh P R (Jira)


[ 
https://issues.apache.org/jira/browse/HIVE-27964?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=17799522#comment-17799522
 ] 

Naresh P R edited comment on HIVE-27964 at 12/21/23 6:17 PM:
-

Partition table rename gets clogged at PART_COL_STATS for wide tables.
{code:java}
CREATE TABLE IF NOT EXISTS `PART_COL_STATS` (
 ...
 `DB_NAME` varchar(128) CHARACTER SET latin1 COLLATE latin1_bin NOT NULL,
 `TABLE_NAME` varchar(128) CHARACTER SET latin1 COLLATE latin1_bin NOT NULL,
 `PARTITION_NAME` varchar(767) CHARACTER SET latin1 COLLATE latin1_bin NOT 
NULL, 
  ...){code}
Since PART_COL_STATS holds db_name & table_name, incase of table rename, every 
row in PART_COL_STATS associated with the table should be fetched, stored in 
memory, delete & re-insert with new db/table/partition name.

Instead clearing the stats before rename & computing later would help to speed 
up the process.

 

Just raised another optimization HIVE-27965, to remove DB_NAME, TABLE_NAME, 
PARTITION_NAME from PART_COL_STATS & use TBL_ID, DB_ID, PART_ID to avoid 
touching PART_COL_STATS for table/partition renames + can be used in indexes as 
well.


was (Author: nareshpr):
Partition table rename gets clogged at PART_COL_STATS for wide tables.
{code:java}
CREATE TABLE IF NOT EXISTS `PART_COL_STATS` (
 ...
 `DB_NAME` varchar(128) CHARACTER SET latin1 COLLATE latin1_bin NOT NULL,
 `TABLE_NAME` varchar(128) CHARACTER SET latin1 COLLATE latin1_bin NOT NULL,
 `PARTITION_NAME` varchar(767) CHARACTER SET latin1 COLLATE latin1_bin NOT 
NULL, 
  ...){code}
Since PART_COL_STATS holds db_name & table_name, incase of table rename, every 
row in PART_COL_STATS associated with the table should be fetched, stored in 
memory, delete & re-insert with new db/table/partition name.

Instead clearing the stats before rename & computing later would help to speed 
up the process.

 

Another optimization i was about to raise is to remove DB_NAME, TABLE_NAME, 
PARTITION_NAME from PART_COL_STATS & use TBL_ID, DB_ID, PART_ID to avoid 
touching PART_COL_STATS for table/partition renames + can be used in indexes as 
well.

> Support drop stats similar to Impala
> 
>
> Key: HIVE-27964
> URL: https://issues.apache.org/jira/browse/HIVE-27964
> Project: Hive
>  Issue Type: New Feature
>Reporter: Naresh P R
>Priority: Major
>
> Hive should support drop stats similar to impala.
> https://impala.apache.org/docs/build/html/topics/impala_drop_stats.html



--
This message was sent by Atlassian Jira
(v8.20.10#820010)


[jira] [Updated] (HIVE-27965) Table/partition rename takes a long time at PART_COL_STATS for wide tables

2023-12-21 Thread Naresh P R (Jira)


 [ 
https://issues.apache.org/jira/browse/HIVE-27965?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Naresh P R updated HIVE-27965:
--
Description: 
Partition table rename gets clogged at PART_COL_STATS for wide tables.
{code:java}
CREATE TABLE IF NOT EXISTS `PART_COL_STATS` (
 ...
 `DB_NAME` varchar(128) CHARACTER SET latin1 COLLATE latin1_bin NOT NULL,
 `TABLE_NAME` varchar(128) CHARACTER SET latin1 COLLATE latin1_bin NOT NULL,
 `PARTITION_NAME` varchar(767) CHARACTER SET latin1 COLLATE latin1_bin NOT 
NULL, 
  ...){code}
Since PART_COL_STATS holds db_name & table_name, incase of table rename, every 
row in PART_COL_STATS associated with the table should be fetched, stored in 
memory, delete & re-insert with new db/table/partition name.

 

Remove DB_NAME, TABLE_NAME, PARTITION_NAME from PART_COL_STATS, instead use 
TBL_ID, DB_ID, PART_ID to avoid touching PART_COL_STATS for table/partition 
renames.

Also TBL_ID, DB_ID, PART_ID can be used for PART_COL_STATS INDEXING.

  was:
Partition table rename gets clogged at PART_COL_STATS for wide tables.
{code:java}
CREATE TABLE IF NOT EXISTS `PART_COL_STATS` (
 ...
 `DB_NAME` varchar(128) CHARACTER SET latin1 COLLATE latin1_bin NOT NULL,
 `TABLE_NAME` varchar(128) CHARACTER SET latin1 COLLATE latin1_bin NOT NULL,
 `PARTITION_NAME` varchar(767) CHARACTER SET latin1 COLLATE latin1_bin NOT 
NULL, 
  ...){code}
Since PART_COL_STATS holds db_name & table_name, incase of table rename, every 
row in PART_COL_STATS associated with the table should be fetched, stored in 
memory, delete & re-insert with new db/table/partition name.

 

Remove DB_NAME, TABLE_NAME, PARTITION_NAME from PART_COL_STATS & use & use 
TBL_ID, DB_ID, PART_ID to avoid touching PART_COL_STATS for table/partition 
renames.

Also TBL_ID, DB_ID, PART_ID can be used for PART_COL_STATS INDEXING.


> Table/partition rename takes a long time at PART_COL_STATS for wide tables
> --
>
> Key: HIVE-27965
> URL: https://issues.apache.org/jira/browse/HIVE-27965
> Project: Hive
>  Issue Type: Improvement
>Reporter: Naresh P R
>Priority: Major
>
> Partition table rename gets clogged at PART_COL_STATS for wide tables.
> {code:java}
> CREATE TABLE IF NOT EXISTS `PART_COL_STATS` (
>  ...
>  `DB_NAME` varchar(128) CHARACTER SET latin1 COLLATE latin1_bin NOT NULL,
>  `TABLE_NAME` varchar(128) CHARACTER SET latin1 COLLATE latin1_bin NOT NULL,
>  `PARTITION_NAME` varchar(767) CHARACTER SET latin1 COLLATE latin1_bin NOT 
> NULL, 
>   ...){code}
> Since PART_COL_STATS holds db_name & table_name, incase of table rename, 
> every row in PART_COL_STATS associated with the table should be fetched, 
> stored in memory, delete & re-insert with new db/table/partition name.
>  
> Remove DB_NAME, TABLE_NAME, PARTITION_NAME from PART_COL_STATS, instead use 
> TBL_ID, DB_ID, PART_ID to avoid touching PART_COL_STATS for table/partition 
> renames.
> Also TBL_ID, DB_ID, PART_ID can be used for PART_COL_STATS INDEXING.



--
This message was sent by Atlassian Jira
(v8.20.10#820010)


[jira] [Updated] (HIVE-27965) Table/partition rename takes a long time at PART_COL_STATS for wide tables

2023-12-21 Thread Naresh P R (Jira)


 [ 
https://issues.apache.org/jira/browse/HIVE-27965?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Naresh P R updated HIVE-27965:
--
Description: 
Partition table rename gets clogged at PART_COL_STATS for wide tables.
{code:java}
CREATE TABLE IF NOT EXISTS `PART_COL_STATS` (
 ...
 `DB_NAME` varchar(128) CHARACTER SET latin1 COLLATE latin1_bin NOT NULL,
 `TABLE_NAME` varchar(128) CHARACTER SET latin1 COLLATE latin1_bin NOT NULL,
 `PARTITION_NAME` varchar(767) CHARACTER SET latin1 COLLATE latin1_bin NOT 
NULL, 
  ...){code}
Since PART_COL_STATS holds db_name & table_name, incase of table rename, every 
row in PART_COL_STATS associated with the table should be fetched, stored in 
memory, delete & re-insert with new db/table/partition name.

 

Remove DB_NAME, TABLE_NAME, PARTITION_NAME from PART_COL_STATS & use & use 
TBL_ID, DB_ID, PART_ID to avoid touching PART_COL_STATS for table/partition 
renames.

Also TBL_ID, DB_ID, PART_ID can be used for PART_COL_STATS INDEXING.

  was:
Partition table rename gets clogged at PART_COL_STATS for wide tables.
{code:java}
CREATE TABLE IF NOT EXISTS `PART_COL_STATS` (
 ...
 `DB_NAME` varchar(128) CHARACTER SET latin1 COLLATE latin1_bin NOT NULL,
 `TABLE_NAME` varchar(128) CHARACTER SET latin1 COLLATE latin1_bin NOT NULL,
 `PARTITION_NAME` varchar(767) CHARACTER SET latin1 COLLATE latin1_bin NOT 
NULL, 
  ...){code}
Since PART_COL_STATS holds db_name & table_name, incase of table rename, every 
row in PART_COL_STATS associated with the table should be fetched, stored in 
memory, delete & re-insert with new db/table/partition name.

 

Remove DB_NAME, TABLE_NAME, PARTITION_NAME from PART_COL_STATS & use PART_ID as 
FOREIGN KEY from PARTITIONS to avoid touching PART_COL_STATS for 
table/partition renames.


> Table/partition rename takes a long time at PART_COL_STATS for wide tables
> --
>
> Key: HIVE-27965
> URL: https://issues.apache.org/jira/browse/HIVE-27965
> Project: Hive
>  Issue Type: Improvement
>Reporter: Naresh P R
>Priority: Major
>
> Partition table rename gets clogged at PART_COL_STATS for wide tables.
> {code:java}
> CREATE TABLE IF NOT EXISTS `PART_COL_STATS` (
>  ...
>  `DB_NAME` varchar(128) CHARACTER SET latin1 COLLATE latin1_bin NOT NULL,
>  `TABLE_NAME` varchar(128) CHARACTER SET latin1 COLLATE latin1_bin NOT NULL,
>  `PARTITION_NAME` varchar(767) CHARACTER SET latin1 COLLATE latin1_bin NOT 
> NULL, 
>   ...){code}
> Since PART_COL_STATS holds db_name & table_name, incase of table rename, 
> every row in PART_COL_STATS associated with the table should be fetched, 
> stored in memory, delete & re-insert with new db/table/partition name.
>  
> Remove DB_NAME, TABLE_NAME, PARTITION_NAME from PART_COL_STATS & use & use 
> TBL_ID, DB_ID, PART_ID to avoid touching PART_COL_STATS for table/partition 
> renames.
> Also TBL_ID, DB_ID, PART_ID can be used for PART_COL_STATS INDEXING.



--
This message was sent by Atlassian Jira
(v8.20.10#820010)


[jira] [Comment Edited] (HIVE-27964) Support drop stats similar to Impala

2023-12-21 Thread Naresh P R (Jira)


[ 
https://issues.apache.org/jira/browse/HIVE-27964?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=17799522#comment-17799522
 ] 

Naresh P R edited comment on HIVE-27964 at 12/21/23 6:10 PM:
-

Partition table rename gets clogged at PART_COL_STATS for wide tables.
{code:java}
CREATE TABLE IF NOT EXISTS `PART_COL_STATS` (
 ...
 `DB_NAME` varchar(128) CHARACTER SET latin1 COLLATE latin1_bin NOT NULL,
 `TABLE_NAME` varchar(128) CHARACTER SET latin1 COLLATE latin1_bin NOT NULL,
 `PARTITION_NAME` varchar(767) CHARACTER SET latin1 COLLATE latin1_bin NOT 
NULL, 
  ...){code}
Since PART_COL_STATS holds db_name & table_name, incase of table rename, every 
row in PART_COL_STATS associated with the table should be fetched, stored in 
memory, delete & re-insert with new db/table/partition name.

Instead clearing the stats before rename & computing later would help to speed 
up the process.

 

Another optimization i was about to raise is to remove DB_NAME, TABLE_NAME, 
PARTITION_NAME from PART_COL_STATS & use TBL_ID, DB_ID, PART_ID to avoid 
touching PART_COL_STATS for table/partition renames + can be used in indexes as 
well.


was (Author: nareshpr):
Partition table rename gets clogged at PART_COL_STATS for wide tables.
{code:java}
CREATE TABLE IF NOT EXISTS `PART_COL_STATS` (
 ...
 `DB_NAME` varchar(128) CHARACTER SET latin1 COLLATE latin1_bin NOT NULL,
 `TABLE_NAME` varchar(128) CHARACTER SET latin1 COLLATE latin1_bin NOT NULL,
 `PARTITION_NAME` varchar(767) CHARACTER SET latin1 COLLATE latin1_bin NOT 
NULL, 
  ...){code}
Since PART_COL_STATS holds db_name & table_name, incase of table rename, every 
row in PART_COL_STATS associated with the table should be fetched, stored in 
memory, delete & re-insert with new db/table/partition name.

Instead clearing the stats before rename & computing later would help to speed 
up the process.

 

Another optimization i was about to raise is to remove DB_NAME, TABLE_NAME, 
PARTITION_NAME from PART_COL_STATS & use PART_ID as FOREIGN KEY from PARTITIONS 
to avoid touching PART_COL_STATS for table/partition renames.

> Support drop stats similar to Impala
> 
>
> Key: HIVE-27964
> URL: https://issues.apache.org/jira/browse/HIVE-27964
> Project: Hive
>  Issue Type: New Feature
>Reporter: Naresh P R
>Priority: Major
>
> Hive should support drop stats similar to impala.
> https://impala.apache.org/docs/build/html/topics/impala_drop_stats.html



--
This message was sent by Atlassian Jira
(v8.20.10#820010)


[jira] [Created] (HIVE-27965) Table/partition rename takes a long time at PART_COL_STATS for wide tables

2023-12-21 Thread Naresh P R (Jira)
Naresh P R created HIVE-27965:
-

 Summary: Table/partition rename takes a long time at 
PART_COL_STATS for wide tables
 Key: HIVE-27965
 URL: https://issues.apache.org/jira/browse/HIVE-27965
 Project: Hive
  Issue Type: Improvement
Reporter: Naresh P R


Partition table rename gets clogged at PART_COL_STATS for wide tables.
{code:java}
CREATE TABLE IF NOT EXISTS `PART_COL_STATS` (
 ...
 `DB_NAME` varchar(128) CHARACTER SET latin1 COLLATE latin1_bin NOT NULL,
 `TABLE_NAME` varchar(128) CHARACTER SET latin1 COLLATE latin1_bin NOT NULL,
 `PARTITION_NAME` varchar(767) CHARACTER SET latin1 COLLATE latin1_bin NOT 
NULL, 
  ...){code}
Since PART_COL_STATS holds db_name & table_name, incase of table rename, every 
row in PART_COL_STATS associated with the table should be fetched, stored in 
memory, delete & re-insert with new db/table/partition name.

 

Remove DB_NAME, TABLE_NAME, PARTITION_NAME from PART_COL_STATS & use PART_ID as 
FOREIGN KEY from PARTITIONS to avoid touching PART_COL_STATS for 
table/partition renames.



--
This message was sent by Atlassian Jira
(v8.20.10#820010)


[jira] [Comment Edited] (HIVE-27964) Support drop stats similar to Impala

2023-12-21 Thread Naresh P R (Jira)


[ 
https://issues.apache.org/jira/browse/HIVE-27964?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=17799522#comment-17799522
 ] 

Naresh P R edited comment on HIVE-27964 at 12/21/23 5:47 PM:
-

Partition table rename gets clogged at PART_COL_STATS for wide tables.
{code:java}
CREATE TABLE IF NOT EXISTS `PART_COL_STATS` (
 ...
 `DB_NAME` varchar(128) CHARACTER SET latin1 COLLATE latin1_bin NOT NULL,
 `TABLE_NAME` varchar(128) CHARACTER SET latin1 COLLATE latin1_bin NOT NULL,
 `PARTITION_NAME` varchar(767) CHARACTER SET latin1 COLLATE latin1_bin NOT 
NULL, 
  ...){code}
Since PART_COL_STATS holds db_name & table_name, incase of table rename, every 
row in PART_COL_STATS associated with the table should be fetched, stored in 
memory, delete & re-insert with new db/table/partition name.

Instead clearing the stats before rename & computing later would help to speed 
up the process.

 

Another optimization i was about to raise is to remove DB_NAME, TABLE_NAME, 
PARTITION_NAME from PART_COL_STATS & use PART_ID as FOREIGN KEY from PARTITIONS 
to avoid touching PART_COL_STATS for table/partition renames.


was (Author: nareshpr):
Partition table rename gets clogged at PART_COL_STATS for wide tables.
{code:java}
CREATE TABLE IF NOT EXISTS `PART_COL_STATS` (
 ...
 `DB_NAME` varchar(128) CHARACTER SET latin1 COLLATE latin1_bin NOT NULL,
 `TABLE_NAME` varchar(128) CHARACTER SET latin1 COLLATE latin1_bin NOT NULL,
 `PARTITION_NAME` varchar(767) CHARACTER SET latin1 COLLATE latin1_bin NOT 
NULL, 
  ...){code}
Since PART_COL_STATS holds db_name & table_name, incase of table rename, every 
row in PART_COL_STATS associated with the table should be fetched, stored in 
memory, dropped & re-added with new tableName.

Instead clearing the stats before rename & computing later would help to speed 
up the process.

 

Another optimization i was about to raise is to remove DB_NAME, TABLE_NAME, 
PARTITION_NAME from PART_COL_STATS & use PART_ID as FOREIGN KEY from PARTITIONS 
to avoid touching PART_COL_STATS for table/partition renames.

> Support drop stats similar to Impala
> 
>
> Key: HIVE-27964
> URL: https://issues.apache.org/jira/browse/HIVE-27964
> Project: Hive
>  Issue Type: New Feature
>Reporter: Naresh P R
>Priority: Major
>
> Hive should support drop stats similar to impala.
> https://impala.apache.org/docs/build/html/topics/impala_drop_stats.html



--
This message was sent by Atlassian Jira
(v8.20.10#820010)


[jira] [Commented] (HIVE-27964) Support drop stats similar to Impala

2023-12-21 Thread Naresh P R (Jira)


[ 
https://issues.apache.org/jira/browse/HIVE-27964?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=17799522#comment-17799522
 ] 

Naresh P R commented on HIVE-27964:
---

Partition table rename gets clogged at PART_COL_STATS for wide tables.
{code:java}
CREATE TABLE IF NOT EXISTS `PART_COL_STATS` (
 ...
 `DB_NAME` varchar(128) CHARACTER SET latin1 COLLATE latin1_bin NOT NULL,
 `TABLE_NAME` varchar(128) CHARACTER SET latin1 COLLATE latin1_bin NOT NULL,
 `PARTITION_NAME` varchar(767) CHARACTER SET latin1 COLLATE latin1_bin NOT 
NULL, 
  ...){code}
Since PART_COL_STATS holds db_name & table_name, incase of table rename, every 
row in PART_COL_STATS associated with the table should be fetched, stored in 
memory, dropped & re-added with new tableName.

Instead clearing the stats before rename & computing later would help to speed 
up the process.

 

Another optimization i was about to raise is to remove DB_NAME, TABLE_NAME, 
PARTITION_NAME from PART_COL_STATS & use PART_ID as FOREIGN KEY from PARTITIONS 
to avoid touching PART_COL_STATS for table/partition renames.

> Support drop stats similar to Impala
> 
>
> Key: HIVE-27964
> URL: https://issues.apache.org/jira/browse/HIVE-27964
> Project: Hive
>  Issue Type: New Feature
>Reporter: Naresh P R
>Priority: Major
>
> Hive should support drop stats similar to impala.
> https://impala.apache.org/docs/build/html/topics/impala_drop_stats.html



--
This message was sent by Atlassian Jira
(v8.20.10#820010)


[jira] [Created] (HIVE-27964) Support drop stats similar to Impala

2023-12-20 Thread Naresh P R (Jira)
Naresh P R created HIVE-27964:
-

 Summary: Support drop stats similar to Impala
 Key: HIVE-27964
 URL: https://issues.apache.org/jira/browse/HIVE-27964
 Project: Hive
  Issue Type: New Feature
Reporter: Naresh P R


Hive should support drop stats similar to impala.

https://impala.apache.org/docs/build/html/topics/impala_drop_stats.html



--
This message was sent by Atlassian Jira
(v8.20.10#820010)


[jira] [Commented] (HIVE-27885) Cast decimal from string with space without digits before dot returns NULL

2023-11-24 Thread Naresh P R (Jira)


[ 
https://issues.apache.org/jira/browse/HIVE-27885?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=17789609#comment-17789609
 ] 

Naresh P R commented on HIVE-27885:
---

Thank you [~rameshkumar] & [~ngangam] for the review and commit.

> Cast decimal from string with space without digits before dot returns NULL
> --
>
> Key: HIVE-27885
> URL: https://issues.apache.org/jira/browse/HIVE-27885
> Project: Hive
>  Issue Type: Bug
>Reporter: Naresh P R
>Assignee: Naresh P R
>Priority: Major
>  Labels: pull-request-available
> Fix For: 4.0.0
>
>
> eg.,
> select cast(". " as decimal(8,4))
> {code:java}
> – Expected output
> 0.
> – Actual output
> NULL
> {code}
> select cast("0. " as decimal(8,4))
> {code:java}
> – Actual output
> 0.
> {code}



--
This message was sent by Atlassian Jira
(v8.20.10#820010)


[jira] [Updated] (HIVE-27885) Cast decimal from string with space without digits before dot returns NULL

2023-11-17 Thread Naresh P R (Jira)


 [ 
https://issues.apache.org/jira/browse/HIVE-27885?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Naresh P R updated HIVE-27885:
--
Description: 
eg.,

select cast(". " as decimal(8,4))

 
{code:java}
– Expected output
0.
– Actual output
NULL
{code}
select cast("0. " as decimal(8,4))

 
{code:java}
– Actual output
0.
{code}
 

> Cast decimal from string with space without digits before dot returns NULL
> --
>
> Key: HIVE-27885
> URL: https://issues.apache.org/jira/browse/HIVE-27885
> Project: Hive
>  Issue Type: Bug
>Reporter: Naresh P R
>Assignee: Naresh P R
>Priority: Major
>
> eg.,
> select cast(". " as decimal(8,4))
>  
> {code:java}
> – Expected output
> 0.
> – Actual output
> NULL
> {code}
> select cast("0. " as decimal(8,4))
>  
> {code:java}
> – Actual output
> 0.
> {code}
>  



--
This message was sent by Atlassian Jira
(v8.20.10#820010)


[jira] [Updated] (HIVE-27885) Cast decimal from string with space without digits before dot returns NULL

2023-11-17 Thread Naresh P R (Jira)


 [ 
https://issues.apache.org/jira/browse/HIVE-27885?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Naresh P R updated HIVE-27885:
--
Description: 
eg.,

select cast(". " as decimal(8,4))
{code:java}
– Expected output
0.
– Actual output
NULL
{code}
select cast("0. " as decimal(8,4))
{code:java}
– Actual output
0.
{code}

  was:
eg.,

select cast(". " as decimal(8,4))

 
{code:java}
– Expected output
0.
– Actual output
NULL
{code}
select cast("0. " as decimal(8,4))

 
{code:java}
– Actual output
0.
{code}
 


> Cast decimal from string with space without digits before dot returns NULL
> --
>
> Key: HIVE-27885
> URL: https://issues.apache.org/jira/browse/HIVE-27885
> Project: Hive
>  Issue Type: Bug
>Reporter: Naresh P R
>Assignee: Naresh P R
>Priority: Major
>
> eg.,
> select cast(". " as decimal(8,4))
> {code:java}
> – Expected output
> 0.
> – Actual output
> NULL
> {code}
> select cast("0. " as decimal(8,4))
> {code:java}
> – Actual output
> 0.
> {code}



--
This message was sent by Atlassian Jira
(v8.20.10#820010)


[jira] [Updated] (HIVE-27885) Cast decimal from string with space without digits before dot returns NULL

2023-11-17 Thread Naresh P R (Jira)


 [ 
https://issues.apache.org/jira/browse/HIVE-27885?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Naresh P R updated HIVE-27885:
--
Environment: (was: eg.,

select cast(". " as decimal(8,4))

-- Expected output

0.

-- Actual output

NULL

 

select cast("0. " as decimal(8,4))

-- Actual output

0.)

> Cast decimal from string with space without digits before dot returns NULL
> --
>
> Key: HIVE-27885
> URL: https://issues.apache.org/jira/browse/HIVE-27885
> Project: Hive
>  Issue Type: Bug
>Reporter: Naresh P R
>Assignee: Naresh P R
>Priority: Major
>




--
This message was sent by Atlassian Jira
(v8.20.10#820010)


[jira] [Created] (HIVE-27885) Cast decimal from string with space without digits before dot returns NULL

2023-11-17 Thread Naresh P R (Jira)
Naresh P R created HIVE-27885:
-

 Summary: Cast decimal from string with space without digits before 
dot returns NULL
 Key: HIVE-27885
 URL: https://issues.apache.org/jira/browse/HIVE-27885
 Project: Hive
  Issue Type: Bug
 Environment: eg.,

select cast(". " as decimal(8,4))

-- Expected output

0.

-- Actual output

NULL

 

select cast("0. " as decimal(8,4))

-- Actual output

0.
Reporter: Naresh P R
Assignee: Naresh P R






--
This message was sent by Atlassian Jira
(v8.20.10#820010)


[jira] [Updated] (HIVE-27876) Incorrect query results on tables with ClusterBy & SortBy

2023-11-15 Thread Naresh P R (Jira)


 [ 
https://issues.apache.org/jira/browse/HIVE-27876?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Naresh P R updated HIVE-27876:
--
Description: 
Repro:

 
{code:java}
create external table test_bucket(age int, name string, dept string) clustered 
by (age, name) sorted by (age asc, name asc) into 2 buckets stored as orc;
insert into test_bucket values (1, 'user1', 'dept1'), ( 2, 'user2' , 'dept2');
insert into test_bucket values (1, 'user1', 'dept1'), ( 2, 'user2' , 'dept2');

//empty wrong results
select age, name, count(*) from test_bucket group by  age, name having count(*) 
> 1; 
+--+---+--+
| age  | name  | _c2  |
+--+---+--+
+--+---+--+

// Workaround
set hive.map.aggr=false;
select age, name, count(*) from test_bucket group by  age, name having count(*) 
> 1; 
+--++--+
| age  |  name  | _c2  |
+--++--+
| 1    | user1  | 2    |
| 2    | user2  | 2    |
+--++--+ {code}
 

 

  was:
Repro:

 
{code:java}
create external table test_bucket(age int, name string, dept string) clustered 
by (age, name) sorted by (age asc, name asc) into 2 buckets stored as orc;
insert into test_bucket values (1, 'user1', 'dept1'), ( 2, 'user2' , 'dept2');
insert into test_bucket values (1, 'user1', 'dept1'), ( 2, 'user2' , 'dept2');

//empty wrong results with default CDP configs
select age, name, count(*) from test_bucket group by  age, name having count(*) 
> 1; 
+--+---+--+
| age  | name  | _c2  |
+--+---+--+
+--+---+--+

// Workaround
set hive.map.aggr=false;
select age, name, count(*) from test_bucket group by  age, name having count(*) 
> 1; 
+--++--+
| age  |  name  | _c2  |
+--++--+
| 1    | user1  | 2    |
| 2    | user2  | 2    |
+--++--+ {code}
 

 


> Incorrect query results on tables with ClusterBy & SortBy
> -
>
> Key: HIVE-27876
> URL: https://issues.apache.org/jira/browse/HIVE-27876
> Project: Hive
>  Issue Type: Bug
>Reporter: Naresh P R
>Priority: Major
>
> Repro:
>  
> {code:java}
> create external table test_bucket(age int, name string, dept string) 
> clustered by (age, name) sorted by (age asc, name asc) into 2 buckets stored 
> as orc;
> insert into test_bucket values (1, 'user1', 'dept1'), ( 2, 'user2' , 'dept2');
> insert into test_bucket values (1, 'user1', 'dept1'), ( 2, 'user2' , 'dept2');
> //empty wrong results
> select age, name, count(*) from test_bucket group by  age, name having 
> count(*) > 1; 
> +--+---+--+
> | age  | name  | _c2  |
> +--+---+--+
> +--+---+--+
> // Workaround
> set hive.map.aggr=false;
> select age, name, count(*) from test_bucket group by  age, name having 
> count(*) > 1; 
> +--++--+
> | age  |  name  | _c2  |
> +--++--+
> | 1    | user1  | 2    |
> | 2    | user2  | 2    |
> +--++--+ {code}
>  
>  



--
This message was sent by Atlassian Jira
(v8.20.10#820010)


[jira] [Created] (HIVE-27876) Incorrect query results on tables with ClusterBy & SortBy

2023-11-15 Thread Naresh P R (Jira)
Naresh P R created HIVE-27876:
-

 Summary: Incorrect query results on tables with ClusterBy & SortBy
 Key: HIVE-27876
 URL: https://issues.apache.org/jira/browse/HIVE-27876
 Project: Hive
  Issue Type: Bug
Reporter: Naresh P R


Repro:

 
{code:java}
create external table test_bucket(age int, name string, dept string) clustered 
by (age, name) sorted by (age asc, name asc) into 2 buckets stored as orc;
insert into test_bucket values (1, 'user1', 'dept1'), ( 2, 'user2' , 'dept2');
insert into test_bucket values (1, 'user1', 'dept1'), ( 2, 'user2' , 'dept2');

//empty wrong results with default CDP configs
select age, name, count(*) from test_bucket group by  age, name having count(*) 
> 1; 
+--+---+--+
| age  | name  | _c2  |
+--+---+--+
+--+---+--+

// Workaround
set hive.map.aggr=false;
select age, name, count(*) from test_bucket group by  age, name having count(*) 
> 1; 
+--++--+
| age  |  name  | _c2  |
+--++--+
| 1    | user1  | 2    |
| 2    | user2  | 2    |
+--++--+ {code}
 

 



--
This message was sent by Atlassian Jira
(v8.20.10#820010)


[jira] [Updated] (HIVE-27428) CTAS fails with SemanticException when join subquery has complex type column and false filter predicate

2023-06-09 Thread Naresh P R (Jira)


 [ 
https://issues.apache.org/jira/browse/HIVE-27428?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Naresh P R updated HIVE-27428:
--
Description: 
Repro steps:
{code:java}
drop table if exists table1;
drop table if exists table2;

create table table1 (a string, b string);
create table table2 (complex_column  create table table2 (complex_column 
array, values:array);

-- CTAS failing query
create table table3 as with t1 as (select * from table1), t2 as (select * from 
table2 where 1=0) select t1.*, t2.* from t1 left join t2;{code}
Exception:
{code:java}
Caused by: org.apache.hadoop.hive.ql.parse.SemanticException: 
CREATE-TABLE-AS-SELECT creates a VOID type, please use CAST to specify the 
type, near field:  t2.complex_column
        at 
org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.deriveFileSinkColTypes(SemanticAnalyzer.java:8171)
 
        at 
org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.deriveFileSinkColTypes(SemanticAnalyzer.java:8129)
 
        at 
org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.genFileSinkPlan(SemanticAnalyzer.java:7822)
 
        at 
org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.genPostGroupByBodyPlan(SemanticAnalyzer.java:11248)
 
        at 
org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.genBodyPlan(SemanticAnalyzer.java:11120)
 
        at 
org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.genPlan(SemanticAnalyzer.java:12050)
 
        at 
org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.genPlan(SemanticAnalyzer.java:11916)
 
        at 
org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.genOPTree(SemanticAnalyzer.java:12730)
 
        at 
org.apache.hadoop.hive.ql.parse.CalcitePlanner.genOPTree(CalcitePlanner.java:722)
 
        at 
org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.analyzeInternal(SemanticAnalyzer.java:12831)
 
        at 
org.apache.hadoop.hive.ql.parse.CalcitePlanner.analyzeInternal(CalcitePlanner.java:442)
 
        at 
org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer.analyze(BaseSemanticAnalyzer.java:300)
 
        at org.apache.hadoop.hive.ql.Compiler.analyze(Compiler.java:220) 
        at org.apache.hadoop.hive.ql.Compiler.compile(Compiler.java:105) 
        at org.apache.hadoop.hive.ql.Driver.compile(Driver.java:194)  {code}

  was:
Repro steps:
{code:java}
drop table if exists table1;
drop table if exists table2;

create table table1 (a string, b string);
create table table2 (complex_column  create table table2 (complex_column 
array, values:array);

-- CTAS failing query
create table table3 as with t1 as (select * from table1), t2 as (select * from 
table2 where 1=0) select t1.*, t2.* from t1 left join t2;{code}
Exception:
{code:java}
Caused by: org.apache.hadoop.hive.ql.parse.SemanticException: 
CREATE-TABLE-AS-SELECT creates a VOID type, please use CAST to specify the 
type, near field:  t2.df0rrd_prod_wers_x
        at 
org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.deriveFileSinkColTypes(SemanticAnalyzer.java:8171)
 
        at 
org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.deriveFileSinkColTypes(SemanticAnalyzer.java:8129)
 
        at 
org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.genFileSinkPlan(SemanticAnalyzer.java:7822)
 
        at 
org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.genPostGroupByBodyPlan(SemanticAnalyzer.java:11248)
 
        at 
org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.genBodyPlan(SemanticAnalyzer.java:11120)
 
        at 
org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.genPlan(SemanticAnalyzer.java:12050)
 
        at 
org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.genPlan(SemanticAnalyzer.java:11916)
 
        at 
org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.genOPTree(SemanticAnalyzer.java:12730)
 
        at 
org.apache.hadoop.hive.ql.parse.CalcitePlanner.genOPTree(CalcitePlanner.java:722)
 
        at 
org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.analyzeInternal(SemanticAnalyzer.java:12831)
 
        at 
org.apache.hadoop.hive.ql.parse.CalcitePlanner.analyzeInternal(CalcitePlanner.java:442)
 
        at 
org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer.analyze(BaseSemanticAnalyzer.java:300)
 
        at org.apache.hadoop.hive.ql.Compiler.analyze(Compiler.java:220) 
        at org.apache.hadoop.hive.ql.Compiler.compile(Compiler.java:105) 
        at org.apache.hadoop.hive.ql.Driver.compile(Driver.java:194)  {code}


> CTAS fails with SemanticException when join subquery has complex type column 
> and false filter predicate
> ---
>
> Key: HIVE-27428
> URL: https://issues.apache.org/jira/browse/HIVE-27428
> Project: Hive
>  Issue Type: Bug
>Reporter: Naresh P R
>Priority: Major
>
> Repro steps:
> {code:java}
> drop table if exists table1;
> drop table if exists table2;
> create table table1 (a string, b string);
> create table table2 

[jira] [Created] (HIVE-27428) CTAS fails with SemanticException when join subquery has complex type column and false filter predicate

2023-06-09 Thread Naresh P R (Jira)
Naresh P R created HIVE-27428:
-

 Summary: CTAS fails with SemanticException when join subquery has 
complex type column and false filter predicate
 Key: HIVE-27428
 URL: https://issues.apache.org/jira/browse/HIVE-27428
 Project: Hive
  Issue Type: Bug
Reporter: Naresh P R


Repro steps:
{code:java}
drop table if exists table1;
drop table if exists table2;

create table table1 (a string, b string);
create table table2 (complex_column  create table table2 (complex_column 
array, values:array);

-- CTAS failing query
create table table3 as with t1 as (select * from table1), t2 as (select * from 
table2 where 1=0) select t1.*, t2.* from t1 left join t2;{code}
Exception:
{code:java}
Caused by: org.apache.hadoop.hive.ql.parse.SemanticException: 
CREATE-TABLE-AS-SELECT creates a VOID type, please use CAST to specify the 
type, near field:  t2.df0rrd_prod_wers_x
        at 
org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.deriveFileSinkColTypes(SemanticAnalyzer.java:8171)
 
        at 
org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.deriveFileSinkColTypes(SemanticAnalyzer.java:8129)
 
        at 
org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.genFileSinkPlan(SemanticAnalyzer.java:7822)
 
        at 
org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.genPostGroupByBodyPlan(SemanticAnalyzer.java:11248)
 
        at 
org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.genBodyPlan(SemanticAnalyzer.java:11120)
 
        at 
org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.genPlan(SemanticAnalyzer.java:12050)
 
        at 
org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.genPlan(SemanticAnalyzer.java:11916)
 
        at 
org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.genOPTree(SemanticAnalyzer.java:12730)
 
        at 
org.apache.hadoop.hive.ql.parse.CalcitePlanner.genOPTree(CalcitePlanner.java:722)
 
        at 
org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.analyzeInternal(SemanticAnalyzer.java:12831)
 
        at 
org.apache.hadoop.hive.ql.parse.CalcitePlanner.analyzeInternal(CalcitePlanner.java:442)
 
        at 
org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer.analyze(BaseSemanticAnalyzer.java:300)
 
        at org.apache.hadoop.hive.ql.Compiler.analyze(Compiler.java:220) 
        at org.apache.hadoop.hive.ql.Compiler.compile(Compiler.java:105) 
        at org.apache.hadoop.hive.ql.Driver.compile(Driver.java:194)  {code}



--
This message was sent by Atlassian Jira
(v8.20.10#820010)


[jira] [Created] (HIVE-27280) Query on View fails in SemanticAnalyzer if view select has grouping sets

2023-04-20 Thread Naresh P R (Jira)
Naresh P R created HIVE-27280:
-

 Summary: Query on View fails in SemanticAnalyzer if view select 
has grouping sets
 Key: HIVE-27280
 URL: https://issues.apache.org/jira/browse/HIVE-27280
 Project: Hive
  Issue Type: Bug
Reporter: Naresh P R
 Attachments: test14.q

View definition is not getting rewritten for grouping UDF columns with proper 
table alias causing compilation issues with following trace.
{code:java}
java.lang.RuntimeException: Expression in GROUPING function not present in 
GROUP BY
    at 
org.apache.hadoop.hive.ql.parse.SemanticAnalyzer$2.post(SemanticAnalyzer.java:3429)
    at org.antlr.runtime.tree.TreeVisitor.visit(TreeVisitor.java:66)
    at org.antlr.runtime.tree.TreeVisitor.visit(TreeVisitor.java:60)
    at org.antlr.runtime.tree.TreeVisitor.visit(TreeVisitor.java:60)
    at 
org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.rewriteGroupingFunctionAST(SemanticAnalyzer.java:3438)
    at 
org.apache.hadoop.hive.ql.parse.CalcitePlanner$CalcitePlannerAction.internalGenSelectLogicalPlan(CalcitePlanner.java:4743)
    at 
org.apache.hadoop.hive.ql.parse.CalcitePlanner$CalcitePlannerAction.genSelectLogicalPlan(CalcitePlanner.java:4505)
    at 
org.apache.hadoop.hive.ql.parse.CalcitePlanner$CalcitePlannerAction.genLogicalPlan(CalcitePlanner.java:5173)
 {code}
Attached Repro file : [^test14.q]



--
This message was sent by Atlassian Jira
(v8.20.10#820010)


[jira] [Updated] (HIVE-27164) Create Temp Txn Table As Select is failing at tablePath validation

2023-03-21 Thread Naresh P R (Jira)


 [ 
https://issues.apache.org/jira/browse/HIVE-27164?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Naresh P R updated HIVE-27164:
--
Description: 
After HIVE-25303, every CTAS goes for  
HiveMetaStore$HMSHandler#translate_table_dryrun() call to fetch table location 
for CTAS queries which fails with following exception for temp tables if 
MetastoreDefaultTransformer is set.
{code:java}
2023-03-17 16:41:23,390 INFO  
org.apache.hadoop.hive.metastore.MetastoreDefaultTransformer: 
[pool-6-thread-196]: Starting translation for CreateTable for processor 
HMSClient-@localhost with [EXTWRITE, EXTREAD, HIVEBUCKET2, HIVEFULLACIDREAD, 
HIVEFULLACIDWRITE, HIVECACHEINVALIDATE, HIVEMANAGESTATS, 
HIVEMANAGEDINSERTWRITE, HIVEMANAGEDINSERTREAD, HIVESQL, HIVEMQT, 
HIVEONLYMQTWRITE] on table test_temp
2023-03-17 16:41:23,392 ERROR 
org.apache.hadoop.hive.metastore.RetryingHMSHandler: [pool-6-thread-196]: 
MetaException(message:Illegal location for managed table, it has to be within 
database's managed location)
        at 
org.apache.hadoop.hive.metastore.MetastoreDefaultTransformer.validateTablePaths(MetastoreDefaultTransformer.java:886)
        at 
org.apache.hadoop.hive.metastore.MetastoreDefaultTransformer.transformCreateTable(MetastoreDefaultTransformer.java:666)
        at 
org.apache.hadoop.hive.metastore.HiveMetaStore$HMSHandler.translate_table_dryrun(HiveMetaStore.java:2164)
        at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) {code}
I am able to repro this issue using attached testcase. [^mm_cttas.q]

 

There are multiple ways to fix this issue
 * Have temp txn table path under db's managed location path. This will help 
with encryption zone paths as well.
 * Skip location check for temp tables at 
MetastoreDefaultTransformer#validateTablePaths()

  was:
After HIVE-25303, every CTAS goes for  
HiveMetaStore$HMSHandler#translate_table_dryrun() call to fetch table location 
for CTAS queries which fails with following exception for temp tables if 
MetastoreDefaultTransformer is set.
{code:java}
2023-03-17 16:41:23,390 INFO  
org.apache.hadoop.hive.metastore.MetastoreDefaultTransformer: 
[pool-6-thread-196]: Starting translation for CreateTable for processor 
HMSClient-@localhost with [EXTWRITE, EXTREAD, HIVEBUCKET2, HIVEFULLACIDREAD, 
HIVEFULLACIDWRITE, HIVECACHEINVALIDATE, HIVEMANAGESTATS, 
HIVEMANAGEDINSERTWRITE, HIVEMANAGEDINSERTREAD, HIVESQL, HIVEMQT, 
HIVEONLYMQTWRITE] on table test_temp
2023-03-17 16:41:23,392 ERROR 
org.apache.hadoop.hive.metastore.RetryingHMSHandler: [pool-6-thread-196]: 
MetaException(message:Illegal location for managed table, it has to be within 
database's managed location)
        at 
org.apache.hadoop.hive.metastore.MetastoreDefaultTransformer.validateTablePaths(MetastoreDefaultTransformer.java:886)
        at 
org.apache.hadoop.hive.metastore.MetastoreDefaultTransformer.transformCreateTable(MetastoreDefaultTransformer.java:666)
        at 
org.apache.hadoop.hive.metastore.HiveMetaStore$HMSHandler.translate_table_dryrun(HiveMetaStore.java:2164)
        at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) {code}
I am able to repro this issue at apache upstream using attached testcase. 
[^mm_cttas.q]

 

There are multiple ways to fix this issue
 * Have temp txn table path under db's managed location path. This will help 
with encryption zone paths as well.
 * Skip location check for temp tables at 
MetastoreDefaultTransformer#validateTablePaths()


> Create Temp Txn Table As Select is failing at tablePath validation
> --
>
> Key: HIVE-27164
> URL: https://issues.apache.org/jira/browse/HIVE-27164
> Project: Hive
>  Issue Type: Bug
>  Components: HiveServer2, Metastore
>Reporter: Naresh P R
>Priority: Major
> Attachments: mm_cttas.q
>
>
> After HIVE-25303, every CTAS goes for  
> HiveMetaStore$HMSHandler#translate_table_dryrun() call to fetch table 
> location for CTAS queries which fails with following exception for temp 
> tables if MetastoreDefaultTransformer is set.
> {code:java}
> 2023-03-17 16:41:23,390 INFO  
> org.apache.hadoop.hive.metastore.MetastoreDefaultTransformer: 
> [pool-6-thread-196]: Starting translation for CreateTable for processor 
> HMSClient-@localhost with [EXTWRITE, EXTREAD, HIVEBUCKET2, HIVEFULLACIDREAD, 
> HIVEFULLACIDWRITE, HIVECACHEINVALIDATE, HIVEMANAGESTATS, 
> HIVEMANAGEDINSERTWRITE, HIVEMANAGEDINSERTREAD, HIVESQL, HIVEMQT, 
> HIVEONLYMQTWRITE] on table test_temp
> 2023-03-17 16:41:23,392 ERROR 
> org.apache.hadoop.hive.metastore.RetryingHMSHandler: [pool-6-thread-196]: 
> MetaException(message:Illegal location for managed table, it has to be within 
> database's managed location)
>         at 
> org.apache.hadoop.hive.metastore.MetastoreDefaultTransformer.validateTablePaths(MetastoreDefaultTransformer.java:886)
> 

[jira] [Updated] (HIVE-27164) Create Temp Txn Table As Select is failing at tablePath validation

2023-03-21 Thread Naresh P R (Jira)


 [ 
https://issues.apache.org/jira/browse/HIVE-27164?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Naresh P R updated HIVE-27164:
--
Description: 
After HIVE-25303, every CTAS goes for  
HiveMetaStore$HMSHandler#translate_table_dryrun() call to fetch table location 
for CTAS queries which fails with following exception for temp tables if 
MetastoreDefaultTransformer is set.
{code:java}
2023-03-17 16:41:23,390 INFO  
org.apache.hadoop.hive.metastore.MetastoreDefaultTransformer: 
[pool-6-thread-196]: Starting translation for CreateTable for processor 
HMSClient-@localhost with [EXTWRITE, EXTREAD, HIVEBUCKET2, HIVEFULLACIDREAD, 
HIVEFULLACIDWRITE, HIVECACHEINVALIDATE, HIVEMANAGESTATS, 
HIVEMANAGEDINSERTWRITE, HIVEMANAGEDINSERTREAD, HIVESQL, HIVEMQT, 
HIVEONLYMQTWRITE] on table test_temp
2023-03-17 16:41:23,392 ERROR 
org.apache.hadoop.hive.metastore.RetryingHMSHandler: [pool-6-thread-196]: 
MetaException(message:Illegal location for managed table, it has to be within 
database's managed location)
        at 
org.apache.hadoop.hive.metastore.MetastoreDefaultTransformer.validateTablePaths(MetastoreDefaultTransformer.java:886)
        at 
org.apache.hadoop.hive.metastore.MetastoreDefaultTransformer.transformCreateTable(MetastoreDefaultTransformer.java:666)
        at 
org.apache.hadoop.hive.metastore.HiveMetaStore$HMSHandler.translate_table_dryrun(HiveMetaStore.java:2164)
        at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) {code}
I am able to repro this issue at apache upstream using attached testcase. 
[^mm_cttas.q]

 

There are multiple ways to fix this issue
 * Have temp txn table path under db's managed location path. This will help 
with encryption zone paths as well.
 * Skip location check for temp tables at 
MetastoreDefaultTransformer#validateTablePaths()

  was:
After HIVE-25303, every CTAS goes for  
HiveMetaStore$HMSHandler#translate_table_dryrun() call to fetch table location 
for CTAS queries which fails with following exception for temp tables if 
MetastoreDefaultTransformer is set.
{code:java}
2023-03-17 16:41:23,390 INFO  
org.apache.hadoop.hive.metastore.MetastoreDefaultTransformer: 
[pool-6-thread-196]: Starting translation for CreateTable for processor 
HMSClient-@localhost with [EXTWRITE, EXTREAD, HIVEBUCKET2, HIVEFULLACIDREAD, 
HIVEFULLACIDWRITE, HIVECACHEINVALIDATE, HIVEMANAGESTATS, 
HIVEMANAGEDINSERTWRITE, HIVEMANAGEDINSERTREAD, HIVESQL, HIVEMQT, 
HIVEONLYMQTWRITE] on table test_temp
2023-03-17 16:41:23,392 ERROR 
org.apache.hadoop.hive.metastore.RetryingHMSHandler: [pool-6-thread-196]: 
MetaException(message:Illegal location for managed table, it has to be within 
database's managed location)
        at 
org.apache.hadoop.hive.metastore.MetastoreDefaultTransformer.validateTablePaths(MetastoreDefaultTransformer.java:886)
        at 
org.apache.hadoop.hive.metastore.MetastoreDefaultTransformer.transformCreateTable(MetastoreDefaultTransformer.java:666)
        at 
org.apache.hadoop.hive.metastore.HiveMetaStore$HMSHandler.translate_table_dryrun(HiveMetaStore.java:2164)
        at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) {code}
I am able to repro this issue at apache upstream using attached testcase. 
[^mm_cttas.q]

 

There are multiple ways to fix this issue
 * Have temp txn table path under db's managed location path. This will help 
with encryption zone tables as well.
 * Skip location check for temp tables at 
MetastoreDefaultTransformer#validateTablePaths()


> Create Temp Txn Table As Select is failing at tablePath validation
> --
>
> Key: HIVE-27164
> URL: https://issues.apache.org/jira/browse/HIVE-27164
> Project: Hive
>  Issue Type: Bug
>  Components: HiveServer2, Metastore
>Reporter: Naresh P R
>Priority: Major
> Attachments: mm_cttas.q
>
>
> After HIVE-25303, every CTAS goes for  
> HiveMetaStore$HMSHandler#translate_table_dryrun() call to fetch table 
> location for CTAS queries which fails with following exception for temp 
> tables if MetastoreDefaultTransformer is set.
> {code:java}
> 2023-03-17 16:41:23,390 INFO  
> org.apache.hadoop.hive.metastore.MetastoreDefaultTransformer: 
> [pool-6-thread-196]: Starting translation for CreateTable for processor 
> HMSClient-@localhost with [EXTWRITE, EXTREAD, HIVEBUCKET2, HIVEFULLACIDREAD, 
> HIVEFULLACIDWRITE, HIVECACHEINVALIDATE, HIVEMANAGESTATS, 
> HIVEMANAGEDINSERTWRITE, HIVEMANAGEDINSERTREAD, HIVESQL, HIVEMQT, 
> HIVEONLYMQTWRITE] on table test_temp
> 2023-03-17 16:41:23,392 ERROR 
> org.apache.hadoop.hive.metastore.RetryingHMSHandler: [pool-6-thread-196]: 
> MetaException(message:Illegal location for managed table, it has to be within 
> database's managed location)
>         at 
> 

[jira] [Updated] (HIVE-27164) Create Temp Txn Table As Select is failing at tablePath validation

2023-03-21 Thread Naresh P R (Jira)


 [ 
https://issues.apache.org/jira/browse/HIVE-27164?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Naresh P R updated HIVE-27164:
--
Attachment: mm_cttas.q

> Create Temp Txn Table As Select is failing at tablePath validation
> --
>
> Key: HIVE-27164
> URL: https://issues.apache.org/jira/browse/HIVE-27164
> Project: Hive
>  Issue Type: Bug
>  Components: HiveServer2, Metastore
>Reporter: Naresh P R
>Priority: Major
> Attachments: mm_cttas.q
>
>
> After HIVE-25303, every CTAS goes for  
> HiveMetaStore$HMSHandler#translate_table_dryrun() call to fetch table 
> location for CTAS queries which fails with following exception for temp 
> tables if MetastoreDefaultTransformer is set.
> {code:java}
> 2023-03-17 16:41:23,390 INFO  
> org.apache.hadoop.hive.metastore.MetastoreDefaultTransformer: 
> [pool-6-thread-196]: Starting translation for CreateTable for processor 
> HMSClient-@localhost with [EXTWRITE, EXTREAD, HIVEBUCKET2, HIVEFULLACIDREAD, 
> HIVEFULLACIDWRITE, HIVECACHEINVALIDATE, HIVEMANAGESTATS, 
> HIVEMANAGEDINSERTWRITE, HIVEMANAGEDINSERTREAD, HIVESQL, HIVEMQT, 
> HIVEONLYMQTWRITE] on table test_temp
> 2023-03-17 16:41:23,392 ERROR 
> org.apache.hadoop.hive.metastore.RetryingHMSHandler: [pool-6-thread-196]: 
> MetaException(message:Illegal location for managed table, it has to be within 
> database's managed location)
>         at 
> org.apache.hadoop.hive.metastore.MetastoreDefaultTransformer.validateTablePaths(MetastoreDefaultTransformer.java:886)
>         at 
> org.apache.hadoop.hive.metastore.MetastoreDefaultTransformer.transformCreateTable(MetastoreDefaultTransformer.java:666)
>         at 
> org.apache.hadoop.hive.metastore.HiveMetaStore$HMSHandler.translate_table_dryrun(HiveMetaStore.java:2164)
>         at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) {code}
> I am able to repro this issue at apache upstream using attached testcase.
>  
> There are multiple ways to fix this issue
>  * Have temp txn table path under db's managed location path. This will help 
> with encryption zone tables as well.
>  * skips location check for temp tables at 
> MetastoreDefaultTransformer#validateTablePaths()



--
This message was sent by Atlassian Jira
(v8.20.10#820010)


[jira] [Updated] (HIVE-27164) Create Temp Txn Table As Select is failing at tablePath validation

2023-03-21 Thread Naresh P R (Jira)


 [ 
https://issues.apache.org/jira/browse/HIVE-27164?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Naresh P R updated HIVE-27164:
--
Description: 
After HIVE-25303, every CTAS goes for  
HiveMetaStore$HMSHandler#translate_table_dryrun() call to fetch table location 
for CTAS queries which fails with following exception for temp tables if 
MetastoreDefaultTransformer is set.
{code:java}
2023-03-17 16:41:23,390 INFO  
org.apache.hadoop.hive.metastore.MetastoreDefaultTransformer: 
[pool-6-thread-196]: Starting translation for CreateTable for processor 
HMSClient-@localhost with [EXTWRITE, EXTREAD, HIVEBUCKET2, HIVEFULLACIDREAD, 
HIVEFULLACIDWRITE, HIVECACHEINVALIDATE, HIVEMANAGESTATS, 
HIVEMANAGEDINSERTWRITE, HIVEMANAGEDINSERTREAD, HIVESQL, HIVEMQT, 
HIVEONLYMQTWRITE] on table test_temp
2023-03-17 16:41:23,392 ERROR 
org.apache.hadoop.hive.metastore.RetryingHMSHandler: [pool-6-thread-196]: 
MetaException(message:Illegal location for managed table, it has to be within 
database's managed location)
        at 
org.apache.hadoop.hive.metastore.MetastoreDefaultTransformer.validateTablePaths(MetastoreDefaultTransformer.java:886)
        at 
org.apache.hadoop.hive.metastore.MetastoreDefaultTransformer.transformCreateTable(MetastoreDefaultTransformer.java:666)
        at 
org.apache.hadoop.hive.metastore.HiveMetaStore$HMSHandler.translate_table_dryrun(HiveMetaStore.java:2164)
        at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) {code}
I am able to repro this issue at apache upstream using attached testcase. 
[^mm_cttas.q]

 

There are multiple ways to fix this issue
 * Have temp txn table path under db's managed location path. This will help 
with encryption zone tables as well.
 * skips location check for temp tables at 
MetastoreDefaultTransformer#validateTablePaths()

  was:
After HIVE-25303, every CTAS goes for  
HiveMetaStore$HMSHandler#translate_table_dryrun() call to fetch table location 
for CTAS queries which fails with following exception for temp tables if 
MetastoreDefaultTransformer is set.
{code:java}
2023-03-17 16:41:23,390 INFO  
org.apache.hadoop.hive.metastore.MetastoreDefaultTransformer: 
[pool-6-thread-196]: Starting translation for CreateTable for processor 
HMSClient-@localhost with [EXTWRITE, EXTREAD, HIVEBUCKET2, HIVEFULLACIDREAD, 
HIVEFULLACIDWRITE, HIVECACHEINVALIDATE, HIVEMANAGESTATS, 
HIVEMANAGEDINSERTWRITE, HIVEMANAGEDINSERTREAD, HIVESQL, HIVEMQT, 
HIVEONLYMQTWRITE] on table test_temp
2023-03-17 16:41:23,392 ERROR 
org.apache.hadoop.hive.metastore.RetryingHMSHandler: [pool-6-thread-196]: 
MetaException(message:Illegal location for managed table, it has to be within 
database's managed location)
        at 
org.apache.hadoop.hive.metastore.MetastoreDefaultTransformer.validateTablePaths(MetastoreDefaultTransformer.java:886)
        at 
org.apache.hadoop.hive.metastore.MetastoreDefaultTransformer.transformCreateTable(MetastoreDefaultTransformer.java:666)
        at 
org.apache.hadoop.hive.metastore.HiveMetaStore$HMSHandler.translate_table_dryrun(HiveMetaStore.java:2164)
        at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) {code}
I am able to repro this issue at apache upstream using attached testcase.

 

There are multiple ways to fix this issue
 * Have temp txn table path under db's managed location path. This will help 
with encryption zone tables as well.
 * skips location check for temp tables at 
MetastoreDefaultTransformer#validateTablePaths()


> Create Temp Txn Table As Select is failing at tablePath validation
> --
>
> Key: HIVE-27164
> URL: https://issues.apache.org/jira/browse/HIVE-27164
> Project: Hive
>  Issue Type: Bug
>  Components: HiveServer2, Metastore
>Reporter: Naresh P R
>Priority: Major
> Attachments: mm_cttas.q
>
>
> After HIVE-25303, every CTAS goes for  
> HiveMetaStore$HMSHandler#translate_table_dryrun() call to fetch table 
> location for CTAS queries which fails with following exception for temp 
> tables if MetastoreDefaultTransformer is set.
> {code:java}
> 2023-03-17 16:41:23,390 INFO  
> org.apache.hadoop.hive.metastore.MetastoreDefaultTransformer: 
> [pool-6-thread-196]: Starting translation for CreateTable for processor 
> HMSClient-@localhost with [EXTWRITE, EXTREAD, HIVEBUCKET2, HIVEFULLACIDREAD, 
> HIVEFULLACIDWRITE, HIVECACHEINVALIDATE, HIVEMANAGESTATS, 
> HIVEMANAGEDINSERTWRITE, HIVEMANAGEDINSERTREAD, HIVESQL, HIVEMQT, 
> HIVEONLYMQTWRITE] on table test_temp
> 2023-03-17 16:41:23,392 ERROR 
> org.apache.hadoop.hive.metastore.RetryingHMSHandler: [pool-6-thread-196]: 
> MetaException(message:Illegal location for managed table, it has to be within 
> database's managed location)
>         at 
> 

[jira] [Updated] (HIVE-27164) Create Temp Txn Table As Select is failing at tablePath validation

2023-03-21 Thread Naresh P R (Jira)


 [ 
https://issues.apache.org/jira/browse/HIVE-27164?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Naresh P R updated HIVE-27164:
--
Description: 
After HIVE-25303, every CTAS goes for  
HiveMetaStore$HMSHandler#translate_table_dryrun() call to fetch table location 
for CTAS queries which fails with following exception for temp tables if 
MetastoreDefaultTransformer is set.
{code:java}
2023-03-17 16:41:23,390 INFO  
org.apache.hadoop.hive.metastore.MetastoreDefaultTransformer: 
[pool-6-thread-196]: Starting translation for CreateTable for processor 
HMSClient-@localhost with [EXTWRITE, EXTREAD, HIVEBUCKET2, HIVEFULLACIDREAD, 
HIVEFULLACIDWRITE, HIVECACHEINVALIDATE, HIVEMANAGESTATS, 
HIVEMANAGEDINSERTWRITE, HIVEMANAGEDINSERTREAD, HIVESQL, HIVEMQT, 
HIVEONLYMQTWRITE] on table test_temp
2023-03-17 16:41:23,392 ERROR 
org.apache.hadoop.hive.metastore.RetryingHMSHandler: [pool-6-thread-196]: 
MetaException(message:Illegal location for managed table, it has to be within 
database's managed location)
        at 
org.apache.hadoop.hive.metastore.MetastoreDefaultTransformer.validateTablePaths(MetastoreDefaultTransformer.java:886)
        at 
org.apache.hadoop.hive.metastore.MetastoreDefaultTransformer.transformCreateTable(MetastoreDefaultTransformer.java:666)
        at 
org.apache.hadoop.hive.metastore.HiveMetaStore$HMSHandler.translate_table_dryrun(HiveMetaStore.java:2164)
        at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) {code}
I am able to repro this issue at apache upstream using attached testcase. 
[^mm_cttas.q]

 

There are multiple ways to fix this issue
 * Have temp txn table path under db's managed location path. This will help 
with encryption zone tables as well.
 * Skip location check for temp tables at 
MetastoreDefaultTransformer#validateTablePaths()

  was:
After HIVE-25303, every CTAS goes for  
HiveMetaStore$HMSHandler#translate_table_dryrun() call to fetch table location 
for CTAS queries which fails with following exception for temp tables if 
MetastoreDefaultTransformer is set.
{code:java}
2023-03-17 16:41:23,390 INFO  
org.apache.hadoop.hive.metastore.MetastoreDefaultTransformer: 
[pool-6-thread-196]: Starting translation for CreateTable for processor 
HMSClient-@localhost with [EXTWRITE, EXTREAD, HIVEBUCKET2, HIVEFULLACIDREAD, 
HIVEFULLACIDWRITE, HIVECACHEINVALIDATE, HIVEMANAGESTATS, 
HIVEMANAGEDINSERTWRITE, HIVEMANAGEDINSERTREAD, HIVESQL, HIVEMQT, 
HIVEONLYMQTWRITE] on table test_temp
2023-03-17 16:41:23,392 ERROR 
org.apache.hadoop.hive.metastore.RetryingHMSHandler: [pool-6-thread-196]: 
MetaException(message:Illegal location for managed table, it has to be within 
database's managed location)
        at 
org.apache.hadoop.hive.metastore.MetastoreDefaultTransformer.validateTablePaths(MetastoreDefaultTransformer.java:886)
        at 
org.apache.hadoop.hive.metastore.MetastoreDefaultTransformer.transformCreateTable(MetastoreDefaultTransformer.java:666)
        at 
org.apache.hadoop.hive.metastore.HiveMetaStore$HMSHandler.translate_table_dryrun(HiveMetaStore.java:2164)
        at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) {code}
I am able to repro this issue at apache upstream using attached testcase. 
[^mm_cttas.q]

 

There are multiple ways to fix this issue
 * Have temp txn table path under db's managed location path. This will help 
with encryption zone tables as well.
 * skips location check for temp tables at 
MetastoreDefaultTransformer#validateTablePaths()


> Create Temp Txn Table As Select is failing at tablePath validation
> --
>
> Key: HIVE-27164
> URL: https://issues.apache.org/jira/browse/HIVE-27164
> Project: Hive
>  Issue Type: Bug
>  Components: HiveServer2, Metastore
>Reporter: Naresh P R
>Priority: Major
> Attachments: mm_cttas.q
>
>
> After HIVE-25303, every CTAS goes for  
> HiveMetaStore$HMSHandler#translate_table_dryrun() call to fetch table 
> location for CTAS queries which fails with following exception for temp 
> tables if MetastoreDefaultTransformer is set.
> {code:java}
> 2023-03-17 16:41:23,390 INFO  
> org.apache.hadoop.hive.metastore.MetastoreDefaultTransformer: 
> [pool-6-thread-196]: Starting translation for CreateTable for processor 
> HMSClient-@localhost with [EXTWRITE, EXTREAD, HIVEBUCKET2, HIVEFULLACIDREAD, 
> HIVEFULLACIDWRITE, HIVECACHEINVALIDATE, HIVEMANAGESTATS, 
> HIVEMANAGEDINSERTWRITE, HIVEMANAGEDINSERTREAD, HIVESQL, HIVEMQT, 
> HIVEONLYMQTWRITE] on table test_temp
> 2023-03-17 16:41:23,392 ERROR 
> org.apache.hadoop.hive.metastore.RetryingHMSHandler: [pool-6-thread-196]: 
> MetaException(message:Illegal location for managed table, it has to be within 
> database's managed location)
>         at 
> 

[jira] [Updated] (HIVE-27114) Provide a configurable filter for removing useless properties in Partition objects from listPartitions HMS Calls

2023-02-28 Thread Naresh P R (Jira)


 [ 
https://issues.apache.org/jira/browse/HIVE-27114?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Naresh P R updated HIVE-27114:
--
Summary: Provide a configurable filter for removing useless properties in 
Partition objects from listPartitions HMS Calls  (was: Provide a configurable 
filter for removing useless properties in Partition objects from getPartitions 
HMS Calls)

> Provide a configurable filter for removing useless properties in Partition 
> objects from listPartitions HMS Calls
> 
>
> Key: HIVE-27114
> URL: https://issues.apache.org/jira/browse/HIVE-27114
> Project: Hive
>  Issue Type: Bug
>Reporter: Naresh P R
>Priority: Major
>
> HMS API calls are throwing following exception because of thrift upgrade
> {code:java}
> org.apache.thrift.transport.TTransportException: MaxMessageSize reached
>         at 
> org.apache.thrift.transport.TEndpointTransport.countConsumedMessageBytes(TEndpointTransport.java:96)
>  
>         at 
> org.apache.thrift.transport.TMemoryInputTransport.read(TMemoryInputTransport.java:97)
>  
>         at 
> org.apache.thrift.transport.TSaslTransport.read(TSaslTransport.java:390) 
>         at 
> org.apache.thrift.transport.TSaslClientTransport.read(TSaslClientTransport.java:39)
>  
>         at 
> org.apache.thrift.transport.TTransport.readAll(TTransport.java:109) 
>         at 
> org.apache.hadoop.hive.metastore.security.TFilterTransport.readAll(TFilterTransport.java:63)
>  
>         at 
> org.apache.thrift.protocol.TBinaryProtocol.readStringBody(TBinaryProtocol.java:417)
>  
>         at 
> org.apache.thrift.protocol.TBinaryProtocol.readString(TBinaryProtocol.java:411)
>  
>         at 
> org.apache.hadoop.hive.metastore.api.Partition$PartitionStandardScheme.read(Partition.java:1286)
>  
>         at 
> org.apache.hadoop.hive.metastore.api.Partition$PartitionStandardScheme.read(Partition.java:1205)
>  
>         at 
> org.apache.hadoop.hive.metastore.api.Partition.read(Partition.java:1062) 
>         at 
> org.apache.hadoop.hive.metastore.api.ThriftHiveMetastore$get_partitions_result$get_partitions_resultStandardScheme.read(ThriftHiveMetastore.java)
>  
>         at 
> org.apache.hadoop.hive.metastore.api.ThriftHiveMetastore$get_partitions_result$get_partitions_resultStandardScheme.read(ThriftHiveMetastore.java)
>  
>         at 
> org.apache.hadoop.hive.metastore.api.ThriftHiveMetastore$get_partitions_result.read(ThriftHiveMetastore.java)
>  
>         at 
> org.apache.thrift.TServiceClient.receiveBase(TServiceClient.java:88) 
>         at 
> org.apache.hadoop.hive.metastore.api.ThriftHiveMetastore$Client.recv_get_partitions(ThriftHiveMetastore.java:3290)
>  
>         at 
> org.apache.hadoop.hive.metastore.api.ThriftHiveMetastore$Client.get_partitions(ThriftHiveMetastore.java:3275)
>  
>         at 
> org.apache.hadoop.hive.metastore.HiveMetaStoreClient.listPartitions(HiveMetaStoreClient.java:1782)
>  
>         at 
> org.apache.hadoop.hive.ql.metadata.SessionHiveMetaStoreClient.listPartitions(SessionHiveMetaStoreClient.java:1134)
>  
>         at 
> org.apache.hadoop.hive.metastore.HiveMetaStoreClient.listPartitions(HiveMetaStoreClient.java:1775)
>  
>         at sun.reflect.GeneratedMethodAccessor169.invoke(Unknown Source) 
> ~[?:?]
>         at 
> sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
>  ~[?:1.8.0_311]
>         at java.lang.reflect.Method.invoke(Method.java:498) ~[?:1.8.0_311]
>         at 
> org.apache.hadoop.hive.metastore.RetryingMetaStoreClient.invoke(RetryingMetaStoreClient.java:213)
>  
>         at com.sun.proxy.$Proxy52.listPartitions(Unknown Source) ~[?:?]
>         at sun.reflect.GeneratedMethodAccessor169.invoke(Unknown Source) 
> ~[?:?]
>         at 
> sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
>  ~[?:1.8.0_311]
>         at java.lang.reflect.Method.invoke(Method.java:498) ~[?:1.8.0_311]
>         at 
> org.apache.hadoop.hive.metastore.HiveMetaStoreClient$SynchronizedHandler.invoke(HiveMetaStoreClient.java:3550)
>  
>         at com.sun.proxy.$Proxy52.listPartitions(Unknown Source) ~[?:?]
>         at 
> org.apache.hadoop.hive.ql.metadata.Hive.getAllPartitionsOf(Hive.java:3793) 
>         at 
> org.apache.hadoop.hive.ql.optimizer.ppr.PartitionPruner.getAllPartitions(PartitionPruner.java:485)
>    {code}
> Large size partition metadata is causing this issue
> eg., impala stores huge stats chunk in partitionMetadata with {*}param_keys = 
> (impala_intermediate_stats_chunk{*}{*}), these PARTITION_PARAM_KEYS are not 
> required for Hive. These params should be skipped while preparing partition 
> object from HMS to HS2.
> Similar to HIVE-25501, any user defined regex param_keys should be skipped in 
> listPartitions HMS 

[jira] [Updated] (HIVE-27114) Provide a configurable filter for removing useless properties in Partition objects from getPartitions HMS Calls

2023-02-28 Thread Naresh P R (Jira)


 [ 
https://issues.apache.org/jira/browse/HIVE-27114?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Naresh P R updated HIVE-27114:
--
Description: 
HMS API calls are throwing following exception because of thrift upgrade
{code:java}
org.apache.thrift.transport.TTransportException: MaxMessageSize reached
        at 
org.apache.thrift.transport.TEndpointTransport.countConsumedMessageBytes(TEndpointTransport.java:96)
 
        at 
org.apache.thrift.transport.TMemoryInputTransport.read(TMemoryInputTransport.java:97)
 
        at 
org.apache.thrift.transport.TSaslTransport.read(TSaslTransport.java:390) 
        at 
org.apache.thrift.transport.TSaslClientTransport.read(TSaslClientTransport.java:39)
 
        at org.apache.thrift.transport.TTransport.readAll(TTransport.java:109) 
        at 
org.apache.hadoop.hive.metastore.security.TFilterTransport.readAll(TFilterTransport.java:63)
 
        at 
org.apache.thrift.protocol.TBinaryProtocol.readStringBody(TBinaryProtocol.java:417)
 
        at 
org.apache.thrift.protocol.TBinaryProtocol.readString(TBinaryProtocol.java:411) 
        at 
org.apache.hadoop.hive.metastore.api.Partition$PartitionStandardScheme.read(Partition.java:1286)
 
        at 
org.apache.hadoop.hive.metastore.api.Partition$PartitionStandardScheme.read(Partition.java:1205)
 
        at 
org.apache.hadoop.hive.metastore.api.Partition.read(Partition.java:1062) 
        at 
org.apache.hadoop.hive.metastore.api.ThriftHiveMetastore$get_partitions_result$get_partitions_resultStandardScheme.read(ThriftHiveMetastore.java)
 
        at 
org.apache.hadoop.hive.metastore.api.ThriftHiveMetastore$get_partitions_result$get_partitions_resultStandardScheme.read(ThriftHiveMetastore.java)
 
        at 
org.apache.hadoop.hive.metastore.api.ThriftHiveMetastore$get_partitions_result.read(ThriftHiveMetastore.java)
 
        at org.apache.thrift.TServiceClient.receiveBase(TServiceClient.java:88) 
        at 
org.apache.hadoop.hive.metastore.api.ThriftHiveMetastore$Client.recv_get_partitions(ThriftHiveMetastore.java:3290)
 
        at 
org.apache.hadoop.hive.metastore.api.ThriftHiveMetastore$Client.get_partitions(ThriftHiveMetastore.java:3275)
 
        at 
org.apache.hadoop.hive.metastore.HiveMetaStoreClient.listPartitions(HiveMetaStoreClient.java:1782)
 
        at 
org.apache.hadoop.hive.ql.metadata.SessionHiveMetaStoreClient.listPartitions(SessionHiveMetaStoreClient.java:1134)
 
        at 
org.apache.hadoop.hive.metastore.HiveMetaStoreClient.listPartitions(HiveMetaStoreClient.java:1775)
 
        at sun.reflect.GeneratedMethodAccessor169.invoke(Unknown Source) ~[?:?]
        at 
sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
 ~[?:1.8.0_311]
        at java.lang.reflect.Method.invoke(Method.java:498) ~[?:1.8.0_311]
        at 
org.apache.hadoop.hive.metastore.RetryingMetaStoreClient.invoke(RetryingMetaStoreClient.java:213)
 
        at com.sun.proxy.$Proxy52.listPartitions(Unknown Source) ~[?:?]
        at sun.reflect.GeneratedMethodAccessor169.invoke(Unknown Source) ~[?:?]
        at 
sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
 ~[?:1.8.0_311]
        at java.lang.reflect.Method.invoke(Method.java:498) ~[?:1.8.0_311]
        at 
org.apache.hadoop.hive.metastore.HiveMetaStoreClient$SynchronizedHandler.invoke(HiveMetaStoreClient.java:3550)
 
        at com.sun.proxy.$Proxy52.listPartitions(Unknown Source) ~[?:?]
        at 
org.apache.hadoop.hive.ql.metadata.Hive.getAllPartitionsOf(Hive.java:3793) 
        at 
org.apache.hadoop.hive.ql.optimizer.ppr.PartitionPruner.getAllPartitions(PartitionPruner.java:485)
   {code}
Large size partition metadata is causing this issue

eg., impala stores huge stats chunk in partitionMetadata with {*}param_keys = 
(impala_intermediate_stats_chunk{*}{*}), these PARTITION_PARAM_KEYS are not 
required for Hive. These params should be skipped while preparing partition 
object from HMS to HS2.

Similar to HIVE-25501, any user defined regex param_keys should be skipped in 
listPartitions HMS API call response.

  was:
HMS API calls are throwing following exception because of thrift upgrade
{code:java}
org.apache.thrift.transport.TTransportException: MaxMessageSize reached
        at 
org.apache.thrift.transport.TEndpointTransport.countConsumedMessageBytes(TEndpointTransport.java:96)
 
        at 
org.apache.thrift.transport.TMemoryInputTransport.read(TMemoryInputTransport.java:97)
 
        at 
org.apache.thrift.transport.TSaslTransport.read(TSaslTransport.java:390) 
        at 
org.apache.thrift.transport.TSaslClientTransport.read(TSaslClientTransport.java:39)
 
        at org.apache.thrift.transport.TTransport.readAll(TTransport.java:109) 
        at 
org.apache.hadoop.hive.metastore.security.TFilterTransport.readAll(TFilterTransport.java:63)
 
        at 
org.apache.thrift.protocol.TBinaryProtocol.readStringBody(TBinaryProtocol.java:417)
 
        

[jira] [Updated] (HIVE-27114) Provide a configurable filter for removing useless properties in Partition objects from getPartitions HMS Calls

2023-02-28 Thread Naresh P R (Jira)


 [ 
https://issues.apache.org/jira/browse/HIVE-27114?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Naresh P R updated HIVE-27114:
--
Description: 
HMS API calls are throwing following exception because of thrift upgrade
{code:java}
org.apache.thrift.transport.TTransportException: MaxMessageSize reached
        at 
org.apache.thrift.transport.TEndpointTransport.countConsumedMessageBytes(TEndpointTransport.java:96)
 
        at 
org.apache.thrift.transport.TMemoryInputTransport.read(TMemoryInputTransport.java:97)
 
        at 
org.apache.thrift.transport.TSaslTransport.read(TSaslTransport.java:390) 
        at 
org.apache.thrift.transport.TSaslClientTransport.read(TSaslClientTransport.java:39)
 
        at org.apache.thrift.transport.TTransport.readAll(TTransport.java:109) 
        at 
org.apache.hadoop.hive.metastore.security.TFilterTransport.readAll(TFilterTransport.java:63)
 
        at 
org.apache.thrift.protocol.TBinaryProtocol.readStringBody(TBinaryProtocol.java:417)
 
        at 
org.apache.thrift.protocol.TBinaryProtocol.readString(TBinaryProtocol.java:411) 
        at 
org.apache.hadoop.hive.metastore.api.Partition$PartitionStandardScheme.read(Partition.java:1286)
 
        at 
org.apache.hadoop.hive.metastore.api.Partition$PartitionStandardScheme.read(Partition.java:1205)
 
        at 
org.apache.hadoop.hive.metastore.api.Partition.read(Partition.java:1062) 
        at 
org.apache.hadoop.hive.metastore.api.ThriftHiveMetastore$get_partitions_result$get_partitions_resultStandardScheme.read(ThriftHiveMetastore.java)
 
        at 
org.apache.hadoop.hive.metastore.api.ThriftHiveMetastore$get_partitions_result$get_partitions_resultStandardScheme.read(ThriftHiveMetastore.java)
 
        at 
org.apache.hadoop.hive.metastore.api.ThriftHiveMetastore$get_partitions_result.read(ThriftHiveMetastore.java)
 
        at org.apache.thrift.TServiceClient.receiveBase(TServiceClient.java:88) 
        at 
org.apache.hadoop.hive.metastore.api.ThriftHiveMetastore$Client.recv_get_partitions(ThriftHiveMetastore.java:3290)
 
        at 
org.apache.hadoop.hive.metastore.api.ThriftHiveMetastore$Client.get_partitions(ThriftHiveMetastore.java:3275)
  {code}
Large size partition metadata is causing this issue

eg., impala stores huge stats chunk in partitionMetadata with {*}param_keys = 
(impala_intermediate_stats_chunk*{*}), these PARTITION_PARAM_KEYS are not 
required for Hive. These params should be skipped while preparing partition 
object from HMS to HS2.

Similar to HIVE-25501, any user defined regex param_keys should be skipped in 
getPartitions HMS API call response.

  was:
HMS API calls are throwing following exception because of thrift upgrade

 
{code:java}
org.apache.thrift.transport.TTransportException: MaxMessageSize reached
        at 
org.apache.thrift.transport.TEndpointTransport.countConsumedMessageBytes(TEndpointTransport.java:96)
 
        at 
org.apache.thrift.transport.TMemoryInputTransport.read(TMemoryInputTransport.java:97)
 
        at 
org.apache.thrift.transport.TSaslTransport.read(TSaslTransport.java:390) 
        at 
org.apache.thrift.transport.TSaslClientTransport.read(TSaslClientTransport.java:39)
 
        at org.apache.thrift.transport.TTransport.readAll(TTransport.java:109) 
        at 
org.apache.hadoop.hive.metastore.security.TFilterTransport.readAll(TFilterTransport.java:63)
 
        at 
org.apache.thrift.protocol.TBinaryProtocol.readStringBody(TBinaryProtocol.java:417)
 
        at 
org.apache.thrift.protocol.TBinaryProtocol.readString(TBinaryProtocol.java:411) 
        at 
org.apache.hadoop.hive.metastore.api.Partition$PartitionStandardScheme.read(Partition.java:1286)
 
        at 
org.apache.hadoop.hive.metastore.api.Partition$PartitionStandardScheme.read(Partition.java:1205)
 
        at 
org.apache.hadoop.hive.metastore.api.Partition.read(Partition.java:1062) 
        at 
org.apache.hadoop.hive.metastore.api.ThriftHiveMetastore$get_partitions_result$get_partitions_resultStandardScheme.read(ThriftHiveMetastore.java)
 
        at 
org.apache.hadoop.hive.metastore.api.ThriftHiveMetastore$get_partitions_result$get_partitions_resultStandardScheme.read(ThriftHiveMetastore.java)
 
        at 
org.apache.hadoop.hive.metastore.api.ThriftHiveMetastore$get_partitions_result.read(ThriftHiveMetastore.java)
 
        at org.apache.thrift.TServiceClient.receiveBase(TServiceClient.java:88) 
        at 
org.apache.hadoop.hive.metastore.api.ThriftHiveMetastore$Client.recv_get_partitions(ThriftHiveMetastore.java:3290)
 
        at 
org.apache.hadoop.hive.metastore.api.ThriftHiveMetastore$Client.get_partitions(ThriftHiveMetastore.java:3275)
  {code}
 

 

Large size partition metadata is causing this issue

eg., impala stores huge stats chunk in partitionMetadata with {*}param_keys = 
(impala_intermediate_stats_chunk*){*}, these PARTITION_PARAM is not required 
for Hive. These params should be skipped while preparing partition object from 
HMS 

[jira] [Updated] (HIVE-27114) Provide a configurable filter for removing useless properties in Partition objects from getPartitions HMS Calls

2023-02-28 Thread Naresh P R (Jira)


 [ 
https://issues.apache.org/jira/browse/HIVE-27114?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Naresh P R updated HIVE-27114:
--
Summary: Provide a configurable filter for removing useless properties in 
Partition objects from getPartitions HMS Calls  (was: Provide a configurable 
filter for removing useless properties from PartitionDesc objects from 
getPartitions HMS Calls)

> Provide a configurable filter for removing useless properties in Partition 
> objects from getPartitions HMS Calls
> ---
>
> Key: HIVE-27114
> URL: https://issues.apache.org/jira/browse/HIVE-27114
> Project: Hive
>  Issue Type: Bug
>Reporter: Naresh P R
>Priority: Major
>
> HMS API calls are throwing following exception because of thrift upgrade
>  
> {code:java}
> org.apache.thrift.transport.TTransportException: MaxMessageSize reached
>         at 
> org.apache.thrift.transport.TEndpointTransport.countConsumedMessageBytes(TEndpointTransport.java:96)
>  
>         at 
> org.apache.thrift.transport.TMemoryInputTransport.read(TMemoryInputTransport.java:97)
>  
>         at 
> org.apache.thrift.transport.TSaslTransport.read(TSaslTransport.java:390) 
>         at 
> org.apache.thrift.transport.TSaslClientTransport.read(TSaslClientTransport.java:39)
>  
>         at 
> org.apache.thrift.transport.TTransport.readAll(TTransport.java:109) 
>         at 
> org.apache.hadoop.hive.metastore.security.TFilterTransport.readAll(TFilterTransport.java:63)
>  
>         at 
> org.apache.thrift.protocol.TBinaryProtocol.readStringBody(TBinaryProtocol.java:417)
>  
>         at 
> org.apache.thrift.protocol.TBinaryProtocol.readString(TBinaryProtocol.java:411)
>  
>         at 
> org.apache.hadoop.hive.metastore.api.Partition$PartitionStandardScheme.read(Partition.java:1286)
>  
>         at 
> org.apache.hadoop.hive.metastore.api.Partition$PartitionStandardScheme.read(Partition.java:1205)
>  
>         at 
> org.apache.hadoop.hive.metastore.api.Partition.read(Partition.java:1062) 
>         at 
> org.apache.hadoop.hive.metastore.api.ThriftHiveMetastore$get_partitions_result$get_partitions_resultStandardScheme.read(ThriftHiveMetastore.java)
>  
>         at 
> org.apache.hadoop.hive.metastore.api.ThriftHiveMetastore$get_partitions_result$get_partitions_resultStandardScheme.read(ThriftHiveMetastore.java)
>  
>         at 
> org.apache.hadoop.hive.metastore.api.ThriftHiveMetastore$get_partitions_result.read(ThriftHiveMetastore.java)
>  
>         at 
> org.apache.thrift.TServiceClient.receiveBase(TServiceClient.java:88) 
>         at 
> org.apache.hadoop.hive.metastore.api.ThriftHiveMetastore$Client.recv_get_partitions(ThriftHiveMetastore.java:3290)
>  
>         at 
> org.apache.hadoop.hive.metastore.api.ThriftHiveMetastore$Client.get_partitions(ThriftHiveMetastore.java:3275)
>   {code}
>  
>  
> Large size partition metadata is causing this issue
> eg., impala stores huge stats chunk in partitionMetadata with {*}param_keys = 
> (impala_intermediate_stats_chunk*){*}, these PARTITION_PARAM is not required 
> for Hive. These params should be skipped while preparing partition object 
> from HMS to HS2.
> Similarly any user defined regex should be skipped in getPartitions HMS API 
> call. Similar to HIVE-25501
>  



--
This message was sent by Atlassian Jira
(v8.20.10#820010)


[jira] [Commented] (HIVE-26495) MSCK repair perf issue HMSChecker ThreadPool is blocked at fs.listStatus

2022-09-26 Thread Naresh P R (Jira)


[ 
https://issues.apache.org/jira/browse/HIVE-26495?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=17609601#comment-17609601
 ] 

Naresh P R commented on HIVE-26495:
---

Thank you for the the review & merge [~srahman] [~ayushtkn] 

> MSCK repair perf issue HMSChecker ThreadPool is blocked at fs.listStatus
> 
>
> Key: HIVE-26495
> URL: https://issues.apache.org/jira/browse/HIVE-26495
> Project: Hive
>  Issue Type: Bug
>Reporter: Naresh P R
>Assignee: Naresh P R
>Priority: Major
>  Labels: pull-request-available
> Fix For: 4.0.0-alpha-2
>
>  Time Spent: 2h
>  Remaining Estimate: 0h
>
> With hive.metastore.fshandler.threads = 15, all 15 *MSCK-GetPaths-xx* are 
> slogging at following trace.
> {code:java}
> "MSCK-GetPaths-11" #12345 daemon prio=5 os_prio=0 tid= nid= waiting on 
> condition [0x7f9f099a6000]
>    java.lang.Thread.State: WAITING (parking)
>     at sun.misc.Unsafe.park(Native Method)
>     - parking to wait for  <0x0003f92d1668> (a 
> java.util.concurrent.CompletableFuture$Signaller)
>     at java.util.concurrent.locks.LockSupport.park(LockSupport.java:175)
>     at 
> java.util.concurrent.CompletableFuture$Signaller.block(CompletableFuture.java:1707)
>     at java.util.concurrent.ForkJoinPool.managedBlock(ForkJoinPool.java:3323)
> ...
> at org.apache.hadoop.fs.s3a.S3AFileSystem.listStatus(S3AFileSystem.java:3230)
>     at org.apache.hadoop.fs.FileSystem.listStatus(FileSystem.java:1953)
>     at org.apache.hadoop.fs.FileSystem.listStatus(FileSystem.java:1995)
>     at 
> org.apache.hadoop.hive.metastore.HiveMetaStoreChecker$PathDepthInfoCallable.processPathDepthInfo(HiveMetaStoreChecker.java:550)
>     at 
> org.apache.hadoop.hive.metastore.HiveMetaStoreChecker$PathDepthInfoCallable.call(HiveMetaStoreChecker.java:543)
>     at 
> org.apache.hadoop.hive.metastore.HiveMetaStoreChecker$PathDepthInfoCallable.call(HiveMetaStoreChecker.java:525)
>     at java.util.concurrent.FutureTask.run(FutureTask.java:266)
>     at 
> java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
>     at 
> java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
>     at java.lang.Thread.run(Thread.java:750){code}
> We should take advantage of non-block listStatusIterator instead of 
> listStatus which is a blocking call.



--
This message was sent by Atlassian Jira
(v8.20.10#820010)


[jira] [Updated] (HIVE-26526) MSCK sync is not removing partitions with special characters

2022-09-08 Thread Naresh P R (Jira)


 [ 
https://issues.apache.org/jira/browse/HIVE-26526?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Naresh P R updated HIVE-26526:
--
Attachment: test.q

> MSCK sync is not removing partitions with special characters
> 
>
> Key: HIVE-26526
> URL: https://issues.apache.org/jira/browse/HIVE-26526
> Project: Hive
>  Issue Type: Bug
>Reporter: Naresh P R
>Priority: Major
> Attachments: test.q
>
>
> PARTITIONS table were having encoding string & PARTITION_KEY_VALS were having 
> original string.
> {code:java}
> hive=> select * from "PARTITION_KEY_VALS" where "PART_ID" IN (46753, 46754, 
> 46755, 46756);
>  PART_ID |    PART_KEY_VAL     | INTEGER_IDX
> -+-+-
>    46753 | 2022-02-*           |           0
>    46754 | 2011-03-01          |           0
>    46755 | 2022-01-*           |           0
>    46756 | 2010-01-01          |           0
>    
>    
> hive=> select * from "PARTITIONS" where "TBL_ID" = 23567 ;
>  PART_ID | CREATE_TIME | LAST_ACCESS_TIME |       PART_NAME       | SD_ID | 
> TBL_ID | WRITE_ID
> -+-+--+---+---++--
>    46753 |           0 |                0 | part_date=2022-02-%2A | 70195 |  
> 23567 |        0
>    46754 |           0 |                0 | part_date=2011-03-01  | 70196 |  
> 23567 |        0
>    46755 |           0 |                0 | part_date=2022-01-%2A | 70197 |  
> 23567 |        0
>    46756 |           0 |                0 | part_date=2010-01-01  | 70198 |  
> 23567 |        0
> (4 rows){code}
>  
> 1) DirectSQL has a join condition on PARTITION_KEY_VALS.PART_KEY_VAL = 
> "2022-02-%2A" at here
> https://github.com/apache/hive/blob/master/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/MetaStoreDirectSql.java#L883
> 2) Jdo is having filter condition on PARTITIONS.PART_NAME = 
> "part_date=2022-02-%252A" (ie., 2 times url encoded)
> Once from HS2
> https://github.com/apache/hive/blob/master/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStoreChecker.java#L353
> 2nd from HMS
> [https://github.com/apache/hive/blob/master/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/parser/ExpressionTree.java#L365]
> Above conditions returns 0 partitions, so those are not removed from HMS 
> metadata.
>  
> Attaching repro q file 



--
This message was sent by Atlassian Jira
(v8.20.10#820010)


[jira] [Updated] (HIVE-26526) MSCK sync is not removing partitions with special characters

2022-09-08 Thread Naresh P R (Jira)


 [ 
https://issues.apache.org/jira/browse/HIVE-26526?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Naresh P R updated HIVE-26526:
--
Issue Type: Bug  (was: New Feature)

> MSCK sync is not removing partitions with special characters
> 
>
> Key: HIVE-26526
> URL: https://issues.apache.org/jira/browse/HIVE-26526
> Project: Hive
>  Issue Type: Bug
>Reporter: Naresh P R
>Priority: Major
>
> PARTITIONS table were having encoding string & PARTITION_KEY_VALS were having 
> original string.
> {code:java}
> hive=> select * from "PARTITION_KEY_VALS" where "PART_ID" IN (46753, 46754, 
> 46755, 46756);
>  PART_ID |    PART_KEY_VAL     | INTEGER_IDX
> -+-+-
>    46753 | 2022-02-*           |           0
>    46754 | 2011-03-01          |           0
>    46755 | 2022-01-*           |           0
>    46756 | 2010-01-01          |           0
>    
>    
> hive=> select * from "PARTITIONS" where "TBL_ID" = 23567 ;
>  PART_ID | CREATE_TIME | LAST_ACCESS_TIME |       PART_NAME       | SD_ID | 
> TBL_ID | WRITE_ID
> -+-+--+---+---++--
>    46753 |           0 |                0 | part_date=2022-02-%2A | 70195 |  
> 23567 |        0
>    46754 |           0 |                0 | part_date=2011-03-01  | 70196 |  
> 23567 |        0
>    46755 |           0 |                0 | part_date=2022-01-%2A | 70197 |  
> 23567 |        0
>    46756 |           0 |                0 | part_date=2010-01-01  | 70198 |  
> 23567 |        0
> (4 rows){code}
>  
> 1) DirectSQL has a join condition on PARTITION_KEY_VALS.PART_KEY_VAL = 
> "2022-02-%2A" at here
> https://github.com/apache/hive/blob/master/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/MetaStoreDirectSql.java#L883
> 2) Jdo is having filter condition on PARTITIONS.PART_NAME = 
> "part_date=2022-02-%252A" (ie., 2 times url encoded)
> Once from HS2
> https://github.com/apache/hive/blob/master/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStoreChecker.java#L353
> 2nd from HMS
> [https://github.com/apache/hive/blob/master/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/parser/ExpressionTree.java#L365]
> Above conditions returns 0 partitions, so those are not removed from HMS 
> metadata.
>  
> Attaching repro q file 



--
This message was sent by Atlassian Jira
(v8.20.10#820010)


[jira] [Commented] (HIVE-26495) MSCK repair perf issue HMSChecker ThreadPool is blocked at fs.listStatus

2022-08-26 Thread Naresh P R (Jira)


[ 
https://issues.apache.org/jira/browse/HIVE-26495?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=17585622#comment-17585622
 ] 

Naresh P R commented on HIVE-26495:
---

[~srahman] I dint see much improvement. I was missing HIVE-21040 which helped 
to improve performance in S3 table with millions of files under partition path.

But still i wanted to pursue with the patch. Let me know if you can review it.

> MSCK repair perf issue HMSChecker ThreadPool is blocked at fs.listStatus
> 
>
> Key: HIVE-26495
> URL: https://issues.apache.org/jira/browse/HIVE-26495
> Project: Hive
>  Issue Type: Bug
>Reporter: Naresh P R
>Assignee: Naresh P R
>Priority: Major
>  Labels: pull-request-available
>  Time Spent: 1h 20m
>  Remaining Estimate: 0h
>
> With hive.metastore.fshandler.threads = 15, all 15 *MSCK-GetPaths-xx* are 
> slogging at following trace.
> {code:java}
> "MSCK-GetPaths-11" #12345 daemon prio=5 os_prio=0 tid= nid= waiting on 
> condition [0x7f9f099a6000]
>    java.lang.Thread.State: WAITING (parking)
>     at sun.misc.Unsafe.park(Native Method)
>     - parking to wait for  <0x0003f92d1668> (a 
> java.util.concurrent.CompletableFuture$Signaller)
>     at java.util.concurrent.locks.LockSupport.park(LockSupport.java:175)
>     at 
> java.util.concurrent.CompletableFuture$Signaller.block(CompletableFuture.java:1707)
>     at java.util.concurrent.ForkJoinPool.managedBlock(ForkJoinPool.java:3323)
> ...
> at org.apache.hadoop.fs.s3a.S3AFileSystem.listStatus(S3AFileSystem.java:3230)
>     at org.apache.hadoop.fs.FileSystem.listStatus(FileSystem.java:1953)
>     at org.apache.hadoop.fs.FileSystem.listStatus(FileSystem.java:1995)
>     at 
> org.apache.hadoop.hive.metastore.HiveMetaStoreChecker$PathDepthInfoCallable.processPathDepthInfo(HiveMetaStoreChecker.java:550)
>     at 
> org.apache.hadoop.hive.metastore.HiveMetaStoreChecker$PathDepthInfoCallable.call(HiveMetaStoreChecker.java:543)
>     at 
> org.apache.hadoop.hive.metastore.HiveMetaStoreChecker$PathDepthInfoCallable.call(HiveMetaStoreChecker.java:525)
>     at java.util.concurrent.FutureTask.run(FutureTask.java:266)
>     at 
> java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
>     at 
> java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
>     at java.lang.Thread.run(Thread.java:750){code}
> We should take advantage of non-block listStatusIterator instead of 
> listStatus which is a blocking call.



--
This message was sent by Atlassian Jira
(v8.20.10#820010)


[jira] [Comment Edited] (HIVE-26495) MSCK repair perf issue HMSChecker ThreadPool is blocked at fs.listStatus

2022-08-26 Thread Naresh P R (Jira)


[ 
https://issues.apache.org/jira/browse/HIVE-26495?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=17585622#comment-17585622
 ] 

Naresh P R edited comment on HIVE-26495 at 8/26/22 9:52 PM:


[~srahman] I dint see much improvement. I was missing HIVE-21040 in my internal 
branch which helped to improve performance in S3 table with millions of files 
under partition path.

But still i wanted to pursue with the patch. Let me know if you can review it.


was (Author: nareshpr):
[~srahman] I dint see much improvement. I was missing HIVE-21040 which helped 
to improve performance in S3 table with millions of files under partition path.

But still i wanted to pursue with the patch. Let me know if you can review it.

> MSCK repair perf issue HMSChecker ThreadPool is blocked at fs.listStatus
> 
>
> Key: HIVE-26495
> URL: https://issues.apache.org/jira/browse/HIVE-26495
> Project: Hive
>  Issue Type: Bug
>Reporter: Naresh P R
>Assignee: Naresh P R
>Priority: Major
>  Labels: pull-request-available
>  Time Spent: 1h 20m
>  Remaining Estimate: 0h
>
> With hive.metastore.fshandler.threads = 15, all 15 *MSCK-GetPaths-xx* are 
> slogging at following trace.
> {code:java}
> "MSCK-GetPaths-11" #12345 daemon prio=5 os_prio=0 tid= nid= waiting on 
> condition [0x7f9f099a6000]
>    java.lang.Thread.State: WAITING (parking)
>     at sun.misc.Unsafe.park(Native Method)
>     - parking to wait for  <0x0003f92d1668> (a 
> java.util.concurrent.CompletableFuture$Signaller)
>     at java.util.concurrent.locks.LockSupport.park(LockSupport.java:175)
>     at 
> java.util.concurrent.CompletableFuture$Signaller.block(CompletableFuture.java:1707)
>     at java.util.concurrent.ForkJoinPool.managedBlock(ForkJoinPool.java:3323)
> ...
> at org.apache.hadoop.fs.s3a.S3AFileSystem.listStatus(S3AFileSystem.java:3230)
>     at org.apache.hadoop.fs.FileSystem.listStatus(FileSystem.java:1953)
>     at org.apache.hadoop.fs.FileSystem.listStatus(FileSystem.java:1995)
>     at 
> org.apache.hadoop.hive.metastore.HiveMetaStoreChecker$PathDepthInfoCallable.processPathDepthInfo(HiveMetaStoreChecker.java:550)
>     at 
> org.apache.hadoop.hive.metastore.HiveMetaStoreChecker$PathDepthInfoCallable.call(HiveMetaStoreChecker.java:543)
>     at 
> org.apache.hadoop.hive.metastore.HiveMetaStoreChecker$PathDepthInfoCallable.call(HiveMetaStoreChecker.java:525)
>     at java.util.concurrent.FutureTask.run(FutureTask.java:266)
>     at 
> java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
>     at 
> java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
>     at java.lang.Thread.run(Thread.java:750){code}
> We should take advantage of non-block listStatusIterator instead of 
> listStatus which is a blocking call.



--
This message was sent by Atlassian Jira
(v8.20.10#820010)


[jira] [Updated] (HIVE-26495) MSCK repair perf issue HMSChecker ThreadPool is blocked at fs.listStatus

2022-08-24 Thread Naresh P R (Jira)


 [ 
https://issues.apache.org/jira/browse/HIVE-26495?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Naresh P R updated HIVE-26495:
--
Issue Type: Bug  (was: New Feature)

> MSCK repair perf issue HMSChecker ThreadPool is blocked at fs.listStatus
> 
>
> Key: HIVE-26495
> URL: https://issues.apache.org/jira/browse/HIVE-26495
> Project: Hive
>  Issue Type: Bug
>Reporter: Naresh P R
>Assignee: Naresh P R
>Priority: Major
>
> With hive.metastore.fshandler.threads = 15, all 15 *MSCK-GetPaths-xx* are 
> slogging at following trace.
> {code:java}
> "MSCK-GetPaths-11" #12345 daemon prio=5 os_prio=0 tid= nid= waiting on 
> condition [0x7f9f099a6000]
>    java.lang.Thread.State: WAITING (parking)
>     at sun.misc.Unsafe.park(Native Method)
>     - parking to wait for  <0x0003f92d1668> (a 
> java.util.concurrent.CompletableFuture$Signaller)
>     at java.util.concurrent.locks.LockSupport.park(LockSupport.java:175)
>     at 
> java.util.concurrent.CompletableFuture$Signaller.block(CompletableFuture.java:1707)
>     at java.util.concurrent.ForkJoinPool.managedBlock(ForkJoinPool.java:3323)
> ...
> at org.apache.hadoop.fs.s3a.S3AFileSystem.listStatus(S3AFileSystem.java:3230)
>     at org.apache.hadoop.fs.FileSystem.listStatus(FileSystem.java:1953)
>     at org.apache.hadoop.fs.FileSystem.listStatus(FileSystem.java:1995)
>     at 
> org.apache.hadoop.hive.metastore.HiveMetaStoreChecker$PathDepthInfoCallable.processPathDepthInfo(HiveMetaStoreChecker.java:550)
>     at 
> org.apache.hadoop.hive.metastore.HiveMetaStoreChecker$PathDepthInfoCallable.call(HiveMetaStoreChecker.java:543)
>     at 
> org.apache.hadoop.hive.metastore.HiveMetaStoreChecker$PathDepthInfoCallable.call(HiveMetaStoreChecker.java:525)
>     at java.util.concurrent.FutureTask.run(FutureTask.java:266)
>     at 
> java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
>     at 
> java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
>     at java.lang.Thread.run(Thread.java:750){code}
> We should take advantage of non-block listStatusIterator instead of 
> listStatus which is a blocking call.



--
This message was sent by Atlassian Jira
(v8.20.10#820010)


[jira] [Assigned] (HIVE-26495) MSCK repair perf issue HMSChecker ThreadPool is blocked at fs.listStatus

2022-08-24 Thread Naresh P R (Jira)


 [ 
https://issues.apache.org/jira/browse/HIVE-26495?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Naresh P R reassigned HIVE-26495:
-


> MSCK repair perf issue HMSChecker ThreadPool is blocked at fs.listStatus
> 
>
> Key: HIVE-26495
> URL: https://issues.apache.org/jira/browse/HIVE-26495
> Project: Hive
>  Issue Type: New Feature
>Reporter: Naresh P R
>Assignee: Naresh P R
>Priority: Major
>
> With hive.metastore.fshandler.threads = 15, all 15 *MSCK-GetPaths-xx* are 
> slogging at following trace.
> {code:java}
> "MSCK-GetPaths-11" #12345 daemon prio=5 os_prio=0 tid= nid= waiting on 
> condition [0x7f9f099a6000]
>    java.lang.Thread.State: WAITING (parking)
>     at sun.misc.Unsafe.park(Native Method)
>     - parking to wait for  <0x0003f92d1668> (a 
> java.util.concurrent.CompletableFuture$Signaller)
>     at java.util.concurrent.locks.LockSupport.park(LockSupport.java:175)
>     at 
> java.util.concurrent.CompletableFuture$Signaller.block(CompletableFuture.java:1707)
>     at java.util.concurrent.ForkJoinPool.managedBlock(ForkJoinPool.java:3323)
> ...
> at org.apache.hadoop.fs.s3a.S3AFileSystem.listStatus(S3AFileSystem.java:3230)
>     at org.apache.hadoop.fs.FileSystem.listStatus(FileSystem.java:1953)
>     at org.apache.hadoop.fs.FileSystem.listStatus(FileSystem.java:1995)
>     at 
> org.apache.hadoop.hive.metastore.HiveMetaStoreChecker$PathDepthInfoCallable.processPathDepthInfo(HiveMetaStoreChecker.java:550)
>     at 
> org.apache.hadoop.hive.metastore.HiveMetaStoreChecker$PathDepthInfoCallable.call(HiveMetaStoreChecker.java:543)
>     at 
> org.apache.hadoop.hive.metastore.HiveMetaStoreChecker$PathDepthInfoCallable.call(HiveMetaStoreChecker.java:525)
>     at java.util.concurrent.FutureTask.run(FutureTask.java:266)
>     at 
> java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
>     at 
> java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
>     at java.lang.Thread.run(Thread.java:750){code}
> We should take advantage of non-block listStatusIterator instead of 
> listStatus which is a blocking call.



--
This message was sent by Atlassian Jira
(v8.20.10#820010)


[jira] [Comment Edited] (HIVE-26096) Select on single column MultiDelimitSerDe table throws AIOBE

2022-04-10 Thread Naresh P R (Jira)


[ 
https://issues.apache.org/jira/browse/HIVE-26096?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=17520096#comment-17520096
 ] 

Naresh P R edited comment on HIVE-26096 at 4/10/22 6:42 AM:


Thanks for the review & commit [~rameshkumar] 


was (Author: nareshpr):
Thanks for the review & commit [~rameshkumar] **

> Select on single column MultiDelimitSerDe table throws AIOBE
> 
>
> Key: HIVE-26096
> URL: https://issues.apache.org/jira/browse/HIVE-26096
> Project: Hive
>  Issue Type: Bug
>Reporter: Naresh P R
>Assignee: Naresh P R
>Priority: Major
>  Labels: pull-request-available
> Fix For: 4.0.0-alpha-2
>
>  Time Spent: 0.5h
>  Remaining Estimate: 0h
>
> Repro details
>  
> {code:java}
> create table test_multidelim(col string)
> ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.MultiDelimitSerDe'
> with serdeproperties('field.delim'='!^') STORED AS TEXTFILE;
> insert into test_multidelim values('aa'),('bb'),('cc'),('dd');
> select * from test_multidelim;
> {code}
> Exception:
> {code:java}
> Caused by: java.lang.ArrayIndexOutOfBoundsException: 0
>         at 
> org.apache.hadoop.hive.serde2.lazy.LazyStruct.parseMultiDelimit(LazyStruct.java:303)
>         at 
> org.apache.hadoop.hive.serde2.MultiDelimitSerDe.doDeserialize(MultiDelimitSerDe.java:160)
>         at 
> org.apache.hadoop.hive.serde2.AbstractEncodingAwareSerDe.deserialize(AbstractEncodingAwareSerDe.java:74)
>         at 
> org.apache.hadoop.hive.ql.exec.FetchOperator.getNextRow(FetchOperator.java:603){code}
>  



--
This message was sent by Atlassian Jira
(v8.20.1#820001)


[jira] [Commented] (HIVE-26096) Select on single column MultiDelimitSerDe table throws AIOBE

2022-04-10 Thread Naresh P R (Jira)


[ 
https://issues.apache.org/jira/browse/HIVE-26096?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=17520096#comment-17520096
 ] 

Naresh P R commented on HIVE-26096:
---

Thanks for the review & commit [~rameshkumar] **

> Select on single column MultiDelimitSerDe table throws AIOBE
> 
>
> Key: HIVE-26096
> URL: https://issues.apache.org/jira/browse/HIVE-26096
> Project: Hive
>  Issue Type: Bug
>Reporter: Naresh P R
>Assignee: Naresh P R
>Priority: Major
>  Labels: pull-request-available
>  Time Spent: 0.5h
>  Remaining Estimate: 0h
>
> Repro details
>  
> {code:java}
> create table test_multidelim(col string)
> ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.MultiDelimitSerDe'
> with serdeproperties('field.delim'='!^') STORED AS TEXTFILE;
> insert into test_multidelim values('aa'),('bb'),('cc'),('dd');
> select * from test_multidelim;
> {code}
> Exception:
> {code:java}
> Caused by: java.lang.ArrayIndexOutOfBoundsException: 0
>         at 
> org.apache.hadoop.hive.serde2.lazy.LazyStruct.parseMultiDelimit(LazyStruct.java:303)
>         at 
> org.apache.hadoop.hive.serde2.MultiDelimitSerDe.doDeserialize(MultiDelimitSerDe.java:160)
>         at 
> org.apache.hadoop.hive.serde2.AbstractEncodingAwareSerDe.deserialize(AbstractEncodingAwareSerDe.java:74)
>         at 
> org.apache.hadoop.hive.ql.exec.FetchOperator.getNextRow(FetchOperator.java:603){code}
>  



--
This message was sent by Atlassian Jira
(v8.20.1#820001)


[jira] [Resolved] (HIVE-26096) Select on single column MultiDelimitSerDe table throws AIOBE

2022-04-10 Thread Naresh P R (Jira)


 [ 
https://issues.apache.org/jira/browse/HIVE-26096?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Naresh P R resolved HIVE-26096.
---
Fix Version/s: 4.0.0-alpha-2
   Resolution: Fixed

> Select on single column MultiDelimitSerDe table throws AIOBE
> 
>
> Key: HIVE-26096
> URL: https://issues.apache.org/jira/browse/HIVE-26096
> Project: Hive
>  Issue Type: Bug
>Reporter: Naresh P R
>Assignee: Naresh P R
>Priority: Major
>  Labels: pull-request-available
> Fix For: 4.0.0-alpha-2
>
>  Time Spent: 0.5h
>  Remaining Estimate: 0h
>
> Repro details
>  
> {code:java}
> create table test_multidelim(col string)
> ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.MultiDelimitSerDe'
> with serdeproperties('field.delim'='!^') STORED AS TEXTFILE;
> insert into test_multidelim values('aa'),('bb'),('cc'),('dd');
> select * from test_multidelim;
> {code}
> Exception:
> {code:java}
> Caused by: java.lang.ArrayIndexOutOfBoundsException: 0
>         at 
> org.apache.hadoop.hive.serde2.lazy.LazyStruct.parseMultiDelimit(LazyStruct.java:303)
>         at 
> org.apache.hadoop.hive.serde2.MultiDelimitSerDe.doDeserialize(MultiDelimitSerDe.java:160)
>         at 
> org.apache.hadoop.hive.serde2.AbstractEncodingAwareSerDe.deserialize(AbstractEncodingAwareSerDe.java:74)
>         at 
> org.apache.hadoop.hive.ql.exec.FetchOperator.getNextRow(FetchOperator.java:603){code}
>  



--
This message was sent by Atlassian Jira
(v8.20.1#820001)


[jira] [Assigned] (HIVE-26096) Select on single column MultiDelimitSerDe table throws AIOBE

2022-03-30 Thread Naresh P R (Jira)


 [ 
https://issues.apache.org/jira/browse/HIVE-26096?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Naresh P R reassigned HIVE-26096:
-


> Select on single column MultiDelimitSerDe table throws AIOBE
> 
>
> Key: HIVE-26096
> URL: https://issues.apache.org/jira/browse/HIVE-26096
> Project: Hive
>  Issue Type: Bug
>Reporter: Naresh P R
>Assignee: Naresh P R
>Priority: Major
>
> Repro details
>  
> {code:java}
> create table test_multidelim(col string)
> ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.MultiDelimitSerDe'
> with serdeproperties('field.delim'='!^') STORED AS TEXTFILE;
> insert into test_multidelim values('aa'),('bb'),('cc'),('dd');
> select * from test_multidelim;
> {code}
> Exception:
> {code:java}
> Caused by: java.lang.ArrayIndexOutOfBoundsException: 0
>         at 
> org.apache.hadoop.hive.serde2.lazy.LazyStruct.parseMultiDelimit(LazyStruct.java:303)
>         at 
> org.apache.hadoop.hive.serde2.MultiDelimitSerDe.doDeserialize(MultiDelimitSerDe.java:160)
>         at 
> org.apache.hadoop.hive.serde2.AbstractEncodingAwareSerDe.deserialize(AbstractEncodingAwareSerDe.java:74)
>         at 
> org.apache.hadoop.hive.ql.exec.FetchOperator.getNextRow(FetchOperator.java:603){code}
>  



--
This message was sent by Atlassian Jira
(v8.20.1#820001)


[jira] [Updated] (HIVE-26047) Vectorized LIKE UDF should use Re2J regex to address JDK-8203458

2022-03-17 Thread Naresh P R (Jira)


 [ 
https://issues.apache.org/jira/browse/HIVE-26047?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Naresh P R updated HIVE-26047:
--
Description: 
Below pattern is taking a long time to validate regex in java8 with same trace 
as shown in java bug

[JDK-8203458|https://bugs.java.com/bugdatabase/view_bug.do?bug_id=JDK-8203458]

 
{code:java}
import java.util.regex.Pattern;
public class Test {
  public static void main(String args[]) {
String pattern = 
"a_b";
 
Pattern CHAIN_PATTERN = Pattern.compile("(%?[^%_]+%?)+");
CHAIN_PATTERN.matcher(pattern).matches(); 
  }
}
{code}
Same is reproducible with following SQL
{code:java}
create table table1(name string);
insert into table1 (name) values 
('a_b');
select * from table1 where name like 
"a_b";{code}

  was:
Below pattern is taking a long time to validate regex in java8 with same trace 
as shown in java bug 
[[JDK-8203458||https://bugs.java.com/bugdatabase/view_bug.do?bug_id=JDK-8203458]
 [https://bugs.java.com/bugdatabase/view_bug.do?bug_id=JDK-8203458] 
[]|https://bugs.java.com/bugdatabase/view_bug.do?bug_id=JDK-8203458]
import java.util.regex.Pattern;

public class ABCD {

  public static void main(String args[]) {
String pattern = 
"a_b";
Pattern CHAIN_PATTERN = Pattern.compile("(%?[^%_]+%?)+");
CHAIN_PATTERN.matcher(pattern).matches();
  }
}
Same is reproducible with following SQL
{code:java}
create table table1(name string);
insert into table1 (name) values 
('a_b');
select * from table1 where name like 
"a_b";{code}


> Vectorized LIKE UDF should use Re2J regex to address JDK-8203458
> 
>
> Key: HIVE-26047
> URL: https://issues.apache.org/jira/browse/HIVE-26047
> Project: Hive
>  Issue Type: Bug
>Reporter: Naresh P R
>Assignee: Naresh P R
>Priority: Major
>
> Below pattern is taking a long time to validate regex in java8 with same 
> trace as shown in java bug
> [JDK-8203458|https://bugs.java.com/bugdatabase/view_bug.do?bug_id=JDK-8203458]
>  
> {code:java}
> import java.util.regex.Pattern;
> public class Test {
>   public static void main(String args[]) {
> String pattern = 
> "a_b";
>  
> Pattern CHAIN_PATTERN = Pattern.compile("(%?[^%_]+%?)+");
> CHAIN_PATTERN.matcher(pattern).matches(); 
>   }
> }
> {code}
> Same is reproducible with following SQL
> {code:java}
> create table table1(name string);
> insert into table1 (name) values 
> ('a_b');
> select * from table1 where name like 
> "a_b";{code}



--
This message was sent by Atlassian Jira
(v8.20.1#820001)


[jira] [Assigned] (HIVE-26047) Vectorized LIKE UDF should use Re2J regex to address JDK-8203458

2022-03-17 Thread Naresh P R (Jira)


 [ 
https://issues.apache.org/jira/browse/HIVE-26047?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Naresh P R reassigned HIVE-26047:
-


> Vectorized LIKE UDF should use Re2J regex to address JDK-8203458
> 
>
> Key: HIVE-26047
> URL: https://issues.apache.org/jira/browse/HIVE-26047
> Project: Hive
>  Issue Type: Bug
>Reporter: Naresh P R
>Assignee: Naresh P R
>Priority: Major
>
> Below pattern is taking a long time to validate regex in java8 with same 
> trace as shown in java bug 
> [[JDK-8203458||https://bugs.java.com/bugdatabase/view_bug.do?bug_id=JDK-8203458]
>  [https://bugs.java.com/bugdatabase/view_bug.do?bug_id=JDK-8203458] 
> []|https://bugs.java.com/bugdatabase/view_bug.do?bug_id=JDK-8203458]
> import java.util.regex.Pattern;
> public class ABCD {
>   public static void main(String args[]) {
> String pattern = 
> "a_b";
> Pattern CHAIN_PATTERN = Pattern.compile("(%?[^%_]+%?)+");
> CHAIN_PATTERN.matcher(pattern).matches();
>   }
> }
> Same is reproducible with following SQL
> {code:java}
> create table table1(name string);
> insert into table1 (name) values 
> ('a_b');
> select * from table1 where name like 
> "a_b";{code}



--
This message was sent by Atlassian Jira
(v8.20.1#820001)


[jira] [Updated] (HIVE-26006) TopNKey and PTF with more than one column is failing with IOBE

2022-03-04 Thread Naresh P R (Jira)


 [ 
https://issues.apache.org/jira/browse/HIVE-26006?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Naresh P R updated HIVE-26006:
--
Description: 
{code:java}
java.lang.IndexOutOfBoundsException: toIndex = 2
at java.util.ArrayList.subListRangeCheck(ArrayList.java:1014)
at java.util.ArrayList.subList(ArrayList.java:1006)
at org.apache.hadoop.hive.ql.plan.TopNKeyDesc.combine(TopNKeyDesc.java:201)
at 
org.apache.hadoop.hive.ql.optimizer.topnkey.TopNKeyPushdownProcessor.pushdownThroughGroupBy(TopNKeyPushdownProcessor.java:162)
at 
org.apache.hadoop.hive.ql.optimizer.topnkey.TopNKeyPushdownProcessor.pushdown(TopNKeyPushdownProcessor.java:76)
at 
org.apache.hadoop.hive.ql.optimizer.topnkey.TopNKeyPushdownProcessor.process(TopNKeyPushdownProcessor.java:57)
at 
org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher.dispatch(DefaultRuleDispatcher.java:90)
at 
org.apache.hadoop.hive.ql.lib.DefaultGraphWalker.dispatchAndReturn(DefaultGraphWalker.java:105)
at 
org.apache.hadoop.hive.ql.lib.DefaultGraphWalker.dispatch(DefaultGraphWalker.java:89)
at 
org.apache.hadoop.hive.ql.lib.DefaultGraphWalker.walk(DefaultGraphWalker.java:158)
at 
org.apache.hadoop.hive.ql.lib.DefaultGraphWalker.startWalking(DefaultGraphWalker.java:120)
at 
org.apache.hadoop.hive.ql.parse.TezCompiler.runTopNKeyOptimization(TezCompiler.java:1305)
at 
org.apache.hadoop.hive.ql.parse.TezCompiler.optimizeOperatorPlan(TezCompiler.java:173)
at org.apache.hadoop.hive.ql.parse.TaskCompiler.compile(TaskCompiler.java:159)
at 
org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.analyzeInternal(SemanticAnalyzer.java:12646)
at 
org.apache.hadoop.hive.ql.parse.CalcitePlanner.analyzeInternal(CalcitePlanner.java:358)
at 
org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer.analyze(BaseSemanticAnalyzer.java:283)
at org.apache.hadoop.hive.ql.Compiler.analyze(Compiler.java:219)
at org.apache.hadoop.hive.ql.Compiler.compile(Compiler.java:103)
at org.apache.hadoop.hive.ql.Driver.compile(Driver.java:215){code}

  was:
java.lang.IndexOutOfBoundsException: toIndex = 2
at java.util.ArrayList.subListRangeCheck(ArrayList.java:1014)
at java.util.ArrayList.subList(ArrayList.java:1006)
at 
org.apache.hadoop.hive.ql.plan.TopNKeyDesc.combine(TopNKeyDesc.java:201)
at 
org.apache.hadoop.hive.ql.optimizer.topnkey.TopNKeyPushdownProcessor.pushdownThroughGroupBy(TopNKeyPushdownProcessor.java:162)
at 
org.apache.hadoop.hive.ql.optimizer.topnkey.TopNKeyPushdownProcessor.pushdown(TopNKeyPushdownProcessor.java:76)
at 
org.apache.hadoop.hive.ql.optimizer.topnkey.TopNKeyPushdownProcessor.process(TopNKeyPushdownProcessor.java:57)
at 
org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher.dispatch(DefaultRuleDispatcher.java:90)
at 
org.apache.hadoop.hive.ql.lib.DefaultGraphWalker.dispatchAndReturn(DefaultGraphWalker.java:105)
at 
org.apache.hadoop.hive.ql.lib.DefaultGraphWalker.dispatch(DefaultGraphWalker.java:89)
at 
org.apache.hadoop.hive.ql.lib.DefaultGraphWalker.walk(DefaultGraphWalker.java:158)
at 
org.apache.hadoop.hive.ql.lib.DefaultGraphWalker.startWalking(DefaultGraphWalker.java:120)
at 
org.apache.hadoop.hive.ql.parse.TezCompiler.runTopNKeyOptimization(TezCompiler.java:1305)
at 
org.apache.hadoop.hive.ql.parse.TezCompiler.optimizeOperatorPlan(TezCompiler.java:173)
at 
org.apache.hadoop.hive.ql.parse.TaskCompiler.compile(TaskCompiler.java:159)
at 
org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.analyzeInternal(SemanticAnalyzer.java:12646)
at 
org.apache.hadoop.hive.ql.parse.CalcitePlanner.analyzeInternal(CalcitePlanner.java:358)
at 
org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer.analyze(BaseSemanticAnalyzer.java:283)
at org.apache.hadoop.hive.ql.Compiler.analyze(Compiler.java:219)
at org.apache.hadoop.hive.ql.Compiler.compile(Compiler.java:103)
at org.apache.hadoop.hive.ql.Driver.compile(Driver.java:215)


> TopNKey and PTF with more than one column is failing with IOBE
> --
>
> Key: HIVE-26006
> URL: https://issues.apache.org/jira/browse/HIVE-26006
> Project: Hive
>  Issue Type: Bug
>Reporter: Naresh P R
>Priority: Major
>
> {code:java}
> java.lang.IndexOutOfBoundsException: toIndex = 2
> at java.util.ArrayList.subListRangeCheck(ArrayList.java:1014)
> at java.util.ArrayList.subList(ArrayList.java:1006)
> at org.apache.hadoop.hive.ql.plan.TopNKeyDesc.combine(TopNKeyDesc.java:201)
> at 
> org.apache.hadoop.hive.ql.optimizer.topnkey.TopNKeyPushdownProcessor.pushdownThroughGroupBy(TopNKeyPushdownProcessor.java:162)
> at 
> org.apache.hadoop.hive.ql.optimizer.topnkey.TopNKeyPushdownProcessor.pushdown(TopNKeyPushdownProcessor.java:76)
> at 
> 

[jira] [Updated] (HIVE-26000) DirectSQL to prune partitions fails with postgres backend for Skewed-Partition tables

2022-03-02 Thread Naresh P R (Jira)


 [ 
https://issues.apache.org/jira/browse/HIVE-26000?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Naresh P R updated HIVE-26000:
--
Summary: DirectSQL to prune partitions fails with postgres backend for 
Skewed-Partition tables  (was: DirectSQL to pruning partitions fails with 
postgres backend for Skewed-Partition tables)

> DirectSQL to prune partitions fails with postgres backend for 
> Skewed-Partition tables
> -
>
> Key: HIVE-26000
> URL: https://issues.apache.org/jira/browse/HIVE-26000
> Project: Hive
>  Issue Type: Bug
>Reporter: Naresh P R
>Assignee: Naresh P R
>Priority: Major
>  Labels: pull-request-available
>  Time Spent: 10m
>  Remaining Estimate: 0h
>
>  
>  
> {code:java}
> 2022-03-02 20:37:56,421 INFO  
> org.apache.hadoop.hive.metastore.PartFilterExprUtil: [pool-6-thread-200]: 
> Unable to make the expression tree from expression string [((ds = 
> '2008-04-08') and (UDFToDouble(hr) = 11.0D))]Error parsing partition filter; 
> lexer error: null; exception NoViableAltException(24@[])
> 2022-03-02 20:37:56,593 WARN  org.apache.hadoop.hive.metastore.ObjectStore: 
> [pool-6-thread-200]: Falling back to ORM path due to direct SQL failure (this 
> is not an error): Error executing SQL query "select 
> "SKEWED_COL_VALUE_LOC_MAP"."SD_ID", 
> "SKEWED_STRING_LIST_VALUES".STRING_LIST_ID, 
> "SKEWED_COL_VALUE_LOC_MAP"."LOCATION", 
> "SKEWED_STRING_LIST_VALUES"."STRING_LIST_VALUE" from 
> "SKEWED_COL_VALUE_LOC_MAP"  left outer join "SKEWED_STRING_LIST_VALUES" on 
> "SKEWED_COL_VALUE_LOC_MAP"."STRING_LIST_ID_KID" = 
> "SKEWED_STRING_LIST_VALUES"."STRING_LIST_ID" where 
> "SKEWED_COL_VALUE_LOC_MAP"."SD_ID" in (51010)  and 
> "SKEWED_COL_VALUE_LOC_MAP"."STRING_LIST_ID_KID" is not null order by 
> "SKEWED_COL_VALUE_LOC_MAP"."SD_ID" asc,  
> "SKEWED_STRING_LIST_VALUES"."STRING_LIST_ID" asc,  
> "SKEWED_STRING_LIST_VALUES"."INTEGER_IDX" asc". at 
> org.datanucleus.api.jdo.NucleusJDOHelper.getJDOExceptionForNucleusException(NucleusJDOHelper.java:543)
>  at org.datanucleus.api.jdo.JDOQuery.executeInternal(JDOQuery.java:391) at 
> org.datanucleus.api.jdo.JDOQuery.execute(JDOQuery.java:216) at 
> org.apache.hadoop.hive.metastore.MetastoreDirectSqlUtils.loopJoinOrderedResult(MetastoreDirectSqlUtils.java:131)
>  at 
> org.apache.hadoop.hive.metastore.MetastoreDirectSqlUtils.loopJoinOrderedResult(MetastoreDirectSqlUtils.java:109)
>  at 
> org.apache.hadoop.hive.metastore.MetastoreDirectSqlUtils.setSkewedColLocationMaps(MetastoreDirectSqlUtils.java:414)
>  at 
> org.apache.hadoop.hive.metastore.MetaStoreDirectSql.getPartitionsFromPartitionIds(MetaStoreDirectSql.java:967)
>  at 
> org.apache.hadoop.hive.metastore.MetaStoreDirectSql.getPartitionsFromPartitionIds(MetaStoreDirectSql.java:788)
>  at 
> org.apache.hadoop.hive.metastore.MetaStoreDirectSql.access$300(MetaStoreDirectSql.java:117)
>  at 
> org.apache.hadoop.hive.metastore.MetaStoreDirectSql$1.run(MetaStoreDirectSql.java:530)
>  at org.apache.hadoop.hive.metastore.Batchable.runBatched(Batchable.java:73) 
> at 
> org.apache.hadoop.hive.metastore.MetaStoreDirectSql.getPartitionsViaSqlFilter(MetaStoreDirectSql.java:521)
>  at 
> org.apache.hadoop.hive.metastore.ObjectStore$10.getSqlResult(ObjectStore.java:3722);
>  Caused by: ERROR: column SKEWED_STRING_LIST_VALUES.string_list_id does not 
> exist
> {code}
>  



--
This message was sent by Atlassian Jira
(v8.20.1#820001)


[jira] [Updated] (HIVE-26000) DirectSQL to pruning partitions fails with postgres backend for Skewed-Partition tables

2022-03-02 Thread Naresh P R (Jira)


 [ 
https://issues.apache.org/jira/browse/HIVE-26000?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Naresh P R updated HIVE-26000:
--
Summary: DirectSQL to pruning partitions fails with postgres backend for 
Skewed-Partition tables  (was: Partition table with Skew columns, DirectSQL to 
pruning partitions fails with Postgres backend)

> DirectSQL to pruning partitions fails with postgres backend for 
> Skewed-Partition tables
> ---
>
> Key: HIVE-26000
> URL: https://issues.apache.org/jira/browse/HIVE-26000
> Project: Hive
>  Issue Type: Bug
>Reporter: Naresh P R
>Assignee: Naresh P R
>Priority: Major
>
>  
>  
> {code:java}
> 2022-03-02 20:37:56,421 INFO  
> org.apache.hadoop.hive.metastore.PartFilterExprUtil: [pool-6-thread-200]: 
> Unable to make the expression tree from expression string [((ds = 
> '2008-04-08') and (UDFToDouble(hr) = 11.0D))]Error parsing partition filter; 
> lexer error: null; exception NoViableAltException(24@[])
> 2022-03-02 20:37:56,593 WARN  org.apache.hadoop.hive.metastore.ObjectStore: 
> [pool-6-thread-200]: Falling back to ORM path due to direct SQL failure (this 
> is not an error): Error executing SQL query "select 
> "SKEWED_COL_VALUE_LOC_MAP"."SD_ID", 
> "SKEWED_STRING_LIST_VALUES".STRING_LIST_ID, 
> "SKEWED_COL_VALUE_LOC_MAP"."LOCATION", 
> "SKEWED_STRING_LIST_VALUES"."STRING_LIST_VALUE" from 
> "SKEWED_COL_VALUE_LOC_MAP"  left outer join "SKEWED_STRING_LIST_VALUES" on 
> "SKEWED_COL_VALUE_LOC_MAP"."STRING_LIST_ID_KID" = 
> "SKEWED_STRING_LIST_VALUES"."STRING_LIST_ID" where 
> "SKEWED_COL_VALUE_LOC_MAP"."SD_ID" in (51010)  and 
> "SKEWED_COL_VALUE_LOC_MAP"."STRING_LIST_ID_KID" is not null order by 
> "SKEWED_COL_VALUE_LOC_MAP"."SD_ID" asc,  
> "SKEWED_STRING_LIST_VALUES"."STRING_LIST_ID" asc,  
> "SKEWED_STRING_LIST_VALUES"."INTEGER_IDX" asc". at 
> org.datanucleus.api.jdo.NucleusJDOHelper.getJDOExceptionForNucleusException(NucleusJDOHelper.java:543)
>  at org.datanucleus.api.jdo.JDOQuery.executeInternal(JDOQuery.java:391) at 
> org.datanucleus.api.jdo.JDOQuery.execute(JDOQuery.java:216) at 
> org.apache.hadoop.hive.metastore.MetastoreDirectSqlUtils.loopJoinOrderedResult(MetastoreDirectSqlUtils.java:131)
>  at 
> org.apache.hadoop.hive.metastore.MetastoreDirectSqlUtils.loopJoinOrderedResult(MetastoreDirectSqlUtils.java:109)
>  at 
> org.apache.hadoop.hive.metastore.MetastoreDirectSqlUtils.setSkewedColLocationMaps(MetastoreDirectSqlUtils.java:414)
>  at 
> org.apache.hadoop.hive.metastore.MetaStoreDirectSql.getPartitionsFromPartitionIds(MetaStoreDirectSql.java:967)
>  at 
> org.apache.hadoop.hive.metastore.MetaStoreDirectSql.getPartitionsFromPartitionIds(MetaStoreDirectSql.java:788)
>  at 
> org.apache.hadoop.hive.metastore.MetaStoreDirectSql.access$300(MetaStoreDirectSql.java:117)
>  at 
> org.apache.hadoop.hive.metastore.MetaStoreDirectSql$1.run(MetaStoreDirectSql.java:530)
>  at org.apache.hadoop.hive.metastore.Batchable.runBatched(Batchable.java:73) 
> at 
> org.apache.hadoop.hive.metastore.MetaStoreDirectSql.getPartitionsViaSqlFilter(MetaStoreDirectSql.java:521)
>  at 
> org.apache.hadoop.hive.metastore.ObjectStore$10.getSqlResult(ObjectStore.java:3722);
>  Caused by: ERROR: column SKEWED_STRING_LIST_VALUES.string_list_id does not 
> exist
> {code}
>  



--
This message was sent by Atlassian Jira
(v8.20.1#820001)


[jira] [Assigned] (HIVE-26000) Partition table with Skew columns, DirectSQL to pruning partitions fails with Postgres backend

2022-03-02 Thread Naresh P R (Jira)


 [ 
https://issues.apache.org/jira/browse/HIVE-26000?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Naresh P R reassigned HIVE-26000:
-


> Partition table with Skew columns, DirectSQL to pruning partitions fails with 
> Postgres backend
> --
>
> Key: HIVE-26000
> URL: https://issues.apache.org/jira/browse/HIVE-26000
> Project: Hive
>  Issue Type: Bug
>Reporter: Naresh P R
>Assignee: Naresh P R
>Priority: Major
>
>  
>  
> {code:java}
> 2022-03-02 20:37:56,421 INFO  
> org.apache.hadoop.hive.metastore.PartFilterExprUtil: [pool-6-thread-200]: 
> Unable to make the expression tree from expression string [((ds = 
> '2008-04-08') and (UDFToDouble(hr) = 11.0D))]Error parsing partition filter; 
> lexer error: null; exception NoViableAltException(24@[])
> 2022-03-02 20:37:56,593 WARN  org.apache.hadoop.hive.metastore.ObjectStore: 
> [pool-6-thread-200]: Falling back to ORM path due to direct SQL failure (this 
> is not an error): Error executing SQL query "select 
> "SKEWED_COL_VALUE_LOC_MAP"."SD_ID", 
> "SKEWED_STRING_LIST_VALUES".STRING_LIST_ID, 
> "SKEWED_COL_VALUE_LOC_MAP"."LOCATION", 
> "SKEWED_STRING_LIST_VALUES"."STRING_LIST_VALUE" from 
> "SKEWED_COL_VALUE_LOC_MAP"  left outer join "SKEWED_STRING_LIST_VALUES" on 
> "SKEWED_COL_VALUE_LOC_MAP"."STRING_LIST_ID_KID" = 
> "SKEWED_STRING_LIST_VALUES"."STRING_LIST_ID" where 
> "SKEWED_COL_VALUE_LOC_MAP"."SD_ID" in (51010)  and 
> "SKEWED_COL_VALUE_LOC_MAP"."STRING_LIST_ID_KID" is not null order by 
> "SKEWED_COL_VALUE_LOC_MAP"."SD_ID" asc,  
> "SKEWED_STRING_LIST_VALUES"."STRING_LIST_ID" asc,  
> "SKEWED_STRING_LIST_VALUES"."INTEGER_IDX" asc". at 
> org.datanucleus.api.jdo.NucleusJDOHelper.getJDOExceptionForNucleusException(NucleusJDOHelper.java:543)
>  at org.datanucleus.api.jdo.JDOQuery.executeInternal(JDOQuery.java:391) at 
> org.datanucleus.api.jdo.JDOQuery.execute(JDOQuery.java:216) at 
> org.apache.hadoop.hive.metastore.MetastoreDirectSqlUtils.loopJoinOrderedResult(MetastoreDirectSqlUtils.java:131)
>  at 
> org.apache.hadoop.hive.metastore.MetastoreDirectSqlUtils.loopJoinOrderedResult(MetastoreDirectSqlUtils.java:109)
>  at 
> org.apache.hadoop.hive.metastore.MetastoreDirectSqlUtils.setSkewedColLocationMaps(MetastoreDirectSqlUtils.java:414)
>  at 
> org.apache.hadoop.hive.metastore.MetaStoreDirectSql.getPartitionsFromPartitionIds(MetaStoreDirectSql.java:967)
>  at 
> org.apache.hadoop.hive.metastore.MetaStoreDirectSql.getPartitionsFromPartitionIds(MetaStoreDirectSql.java:788)
>  at 
> org.apache.hadoop.hive.metastore.MetaStoreDirectSql.access$300(MetaStoreDirectSql.java:117)
>  at 
> org.apache.hadoop.hive.metastore.MetaStoreDirectSql$1.run(MetaStoreDirectSql.java:530)
>  at org.apache.hadoop.hive.metastore.Batchable.runBatched(Batchable.java:73) 
> at 
> org.apache.hadoop.hive.metastore.MetaStoreDirectSql.getPartitionsViaSqlFilter(MetaStoreDirectSql.java:521)
>  at 
> org.apache.hadoop.hive.metastore.ObjectStore$10.getSqlResult(ObjectStore.java:3722);
>  Caused by: ERROR: column SKEWED_STRING_LIST_VALUES.string_list_id does not 
> exist
> {code}
>  



--
This message was sent by Atlassian Jira
(v8.20.1#820001)


[jira] [Updated] (HIVE-25919) CCE when validating AND PPD in HBaseStorageHandler

2022-02-01 Thread Naresh P R (Jira)


 [ 
https://issues.apache.org/jira/browse/HIVE-25919?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Naresh P R updated HIVE-25919:
--
Description: 
After HIVE-13815, 
[this|https://github.com/apache/calcite/blob/branch-1.10/core/src/main/java/org/apache/calcite/rex/RexUtil.java#L1797-L1810]
 change is removing (EQUALS true) from predicate expression.

AND Condition with boolean=true is throwing CCE at here because of above change.
[https://github.com/apache/hive/blob/master/ql/src/java/org/apache/hadoop/hive/ql/index/IndexPredicateAnalyzer.java#L163]
{code:java}
ClassCastException org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc cannot be 
cast to org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc
java.lang.ClassCastException: org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc 
cannot be cast to org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc
at 
org.apache.hadoop.hive.ql.index.IndexPredicateAnalyzer$1.process(IndexPredicateAnalyzer.java:163)
at 
org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher.dispatch(DefaultRuleDispatcher.java:90)
at 
org.apache.hadoop.hive.ql.lib.DefaultGraphWalker.dispatchAndReturn(DefaultGraphWalker.java:105)
at 
org.apache.hadoop.hive.ql.lib.DefaultGraphWalker.dispatch(DefaultGraphWalker.java:89)
at 
org.apache.hadoop.hive.ql.lib.DefaultGraphWalker.walk(DefaultGraphWalker.java:178)
at 
org.apache.hadoop.hive.ql.lib.DefaultGraphWalker.startWalking(DefaultGraphWalker.java:120)
at 
org.apache.hadoop.hive.ql.index.IndexPredicateAnalyzer.analyzePredicate(IndexPredicateAnalyzer.java:174)
at 
org.apache.hadoop.hive.hbase.HBaseStorageHandler.decomposePredicate(HBaseStorageHandler.java:415)
 {code}
Attached repro q file

mvn test -Dtest=TestHBaseCliDriver -Dqfile=test.q -Dtest.output.overwrite 
-DskipSparkTests -pl itests/qtest -Pitests

  was:
After HIVE-13815, 
[this|https://github.com/apache/calcite/blob/branch-1.10/core/src/main/java/org/apache/calcite/rex/RexUtil.java#L1797-L1810]
 change is removing (EQUALS true) from predicate expression.

 

AND Condition with boolean=true is throwing CCE at here because of above change.
[https://github.com/apache/hive/blob/master/ql/src/java/org/apache/hadoop/hive/ql/index/IndexPredicateAnalyzer.java#L163]

 
{code:java}
ClassCastException org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc cannot be 
cast to org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc
java.lang.ClassCastException: org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc 
cannot be cast to org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc
at 
org.apache.hadoop.hive.ql.index.IndexPredicateAnalyzer$1.process(IndexPredicateAnalyzer.java:163)
at 
org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher.dispatch(DefaultRuleDispatcher.java:90)
at 
org.apache.hadoop.hive.ql.lib.DefaultGraphWalker.dispatchAndReturn(DefaultGraphWalker.java:105)
at 
org.apache.hadoop.hive.ql.lib.DefaultGraphWalker.dispatch(DefaultGraphWalker.java:89)
at 
org.apache.hadoop.hive.ql.lib.DefaultGraphWalker.walk(DefaultGraphWalker.java:178)
at 
org.apache.hadoop.hive.ql.lib.DefaultGraphWalker.startWalking(DefaultGraphWalker.java:120)
at 
org.apache.hadoop.hive.ql.index.IndexPredicateAnalyzer.analyzePredicate(IndexPredicateAnalyzer.java:174)
at 
org.apache.hadoop.hive.hbase.HBaseStorageHandler.decomposePredicate(HBaseStorageHandler.java:415)
 {code}


> CCE when validating AND PPD in HBaseStorageHandler 
> ---
>
> Key: HIVE-25919
> URL: https://issues.apache.org/jira/browse/HIVE-25919
> Project: Hive
>  Issue Type: Bug
>Reporter: Naresh P R
>Priority: Major
> Attachments: test.q
>
>
> After HIVE-13815, 
> [this|https://github.com/apache/calcite/blob/branch-1.10/core/src/main/java/org/apache/calcite/rex/RexUtil.java#L1797-L1810]
>  change is removing (EQUALS true) from predicate expression.
> AND Condition with boolean=true is throwing CCE at here because of above 
> change.
> [https://github.com/apache/hive/blob/master/ql/src/java/org/apache/hadoop/hive/ql/index/IndexPredicateAnalyzer.java#L163]
> {code:java}
> ClassCastException org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc cannot 
> be cast to org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc
> java.lang.ClassCastException: 
> org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc cannot be cast to 
> org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc
> at 
> org.apache.hadoop.hive.ql.index.IndexPredicateAnalyzer$1.process(IndexPredicateAnalyzer.java:163)
> at 
> org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher.dispatch(DefaultRuleDispatcher.java:90)
> at 
> org.apache.hadoop.hive.ql.lib.DefaultGraphWalker.dispatchAndReturn(DefaultGraphWalker.java:105)
> at 
> 

[jira] [Updated] (HIVE-25919) CCE when validating AND PPD in HBaseStorageHandler

2022-02-01 Thread Naresh P R (Jira)


 [ 
https://issues.apache.org/jira/browse/HIVE-25919?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Naresh P R updated HIVE-25919:
--
Attachment: test.q

> CCE when validating AND PPD in HBaseStorageHandler 
> ---
>
> Key: HIVE-25919
> URL: https://issues.apache.org/jira/browse/HIVE-25919
> Project: Hive
>  Issue Type: Bug
>Reporter: Naresh P R
>Priority: Major
> Attachments: test.q
>
>
> After HIVE-13815, 
> [this|https://github.com/apache/calcite/blob/branch-1.10/core/src/main/java/org/apache/calcite/rex/RexUtil.java#L1797-L1810]
>  change is removing (EQUALS true) from predicate expression.
>  
> AND Condition with boolean=true is throwing CCE at here because of above 
> change.
> [https://github.com/apache/hive/blob/master/ql/src/java/org/apache/hadoop/hive/ql/index/IndexPredicateAnalyzer.java#L163]
>  
> {code:java}
> ClassCastException org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc cannot 
> be cast to org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc
> java.lang.ClassCastException: 
> org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc cannot be cast to 
> org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc
> at 
> org.apache.hadoop.hive.ql.index.IndexPredicateAnalyzer$1.process(IndexPredicateAnalyzer.java:163)
> at 
> org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher.dispatch(DefaultRuleDispatcher.java:90)
> at 
> org.apache.hadoop.hive.ql.lib.DefaultGraphWalker.dispatchAndReturn(DefaultGraphWalker.java:105)
> at 
> org.apache.hadoop.hive.ql.lib.DefaultGraphWalker.dispatch(DefaultGraphWalker.java:89)
> at 
> org.apache.hadoop.hive.ql.lib.DefaultGraphWalker.walk(DefaultGraphWalker.java:178)
> at 
> org.apache.hadoop.hive.ql.lib.DefaultGraphWalker.startWalking(DefaultGraphWalker.java:120)
> at 
> org.apache.hadoop.hive.ql.index.IndexPredicateAnalyzer.analyzePredicate(IndexPredicateAnalyzer.java:174)
> at 
> org.apache.hadoop.hive.hbase.HBaseStorageHandler.decomposePredicate(HBaseStorageHandler.java:415)
>  {code}



--
This message was sent by Atlassian Jira
(v8.20.1#820001)


[jira] [Updated] (HIVE-25419) HMS Client/Server filter fails with StorageBasedAuthorizationProvider

2021-08-04 Thread Naresh P R (Jira)


 [ 
https://issues.apache.org/jira/browse/HIVE-25419?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Naresh P R updated HIVE-25419:
--
Description: 
hive.security.metastore.authorization.manager=org.apache.hadoop.hive.ql.security.authorization.StorageBasedAuthorizationProvider
 
hive.security.authorization.manager=org.apache.hadoop.hive.ql.security.authorization.StorageBasedAuthorizationProvider
 hive.metastore.client.filter.enabled=true

hive.metastore.server.filter.enabled=true

"Show tables" fails at here

[https://github.com/apache/hive/blob/master/ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveUtils.java#L378-L379]

With following exception:
{code:java}
2021-07-29 17:09:12,278 ERROR 
org.apache.hadoop.hive.metastore.RetryingHMSHandler: [pool-14-thread-5]: 
MetaException(message:Error in 
HiveMetaStoreAuthorizer.filterTables()java.lang.RuntimeException: class 
org.apache.hadoop.hive.ql.security.authorization.StorageBasedAuthorizationProvider
 not 
org.apache.hadoop.hive.ql.security.authorization.plugin.HiveAuthorizerFactory) 
at 
org.apache.hadoop.hive.ql.security.authorization.plugin.metastore.HiveMetaStoreAuthorizer.filterTableNames(HiveMetaStoreAuthorizer.java:333)
 
at 
org.apache.hadoop.hive.ql.security.authorization.plugin.metastore.HiveMetaStoreAuthorizer.filterTableNames(HiveMetaStoreAuthorizer.java:163)
 
at 
org.apache.hadoop.hive.metastore.utils.FilterUtils.filterTableNamesIfEnabled(FilterUtils.java:109)
 
at 
org.apache.hadoop.hive.metastore.HiveMetaStore$HMSHandler.get_tables_by_type(HiveMetaStore.java:5905){code}

  was:
hive.security.metastore.authorization.manager=org.apache.hadoop.hive.ql.security.authorization.StorageBasedAuthorizationProvider
hive.security.authorization.manager=org.apache.hadoop.hive.ql.security.authorization.StorageBasedAuthorizationProvider
hive.metastore.client.filter.enabled=true

"Show tables" fails at here

[https://github.com/apache/hive/blob/master/ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveUtils.java#L378-L379]

With following exception:
{code:java}
2021-07-29 17:09:12,278 ERROR 
org.apache.hadoop.hive.metastore.RetryingHMSHandler: [pool-14-thread-5]: 
MetaException(message:Error in 
HiveMetaStoreAuthorizer.filterTables()java.lang.RuntimeException: class 
org.apache.hadoop.hive.ql.security.authorization.StorageBasedAuthorizationProvider
 not 
org.apache.hadoop.hive.ql.security.authorization.plugin.HiveAuthorizerFactory) 
at 
org.apache.hadoop.hive.ql.security.authorization.plugin.metastore.HiveMetaStoreAuthorizer.filterTableNames(HiveMetaStoreAuthorizer.java:333)
 
at 
org.apache.hadoop.hive.ql.security.authorization.plugin.metastore.HiveMetaStoreAuthorizer.filterTableNames(HiveMetaStoreAuthorizer.java:163)
 
at 
org.apache.hadoop.hive.metastore.utils.FilterUtils.filterTableNamesIfEnabled(FilterUtils.java:109)
 
at 
org.apache.hadoop.hive.metastore.HiveMetaStore$HMSHandler.get_tables_by_type(HiveMetaStore.java:5905){code}


> HMS Client/Server filter fails with StorageBasedAuthorizationProvider
> -
>
> Key: HIVE-25419
> URL: https://issues.apache.org/jira/browse/HIVE-25419
> Project: Hive
>  Issue Type: Bug
>Reporter: Naresh P R
>Priority: Major
>
> hive.security.metastore.authorization.manager=org.apache.hadoop.hive.ql.security.authorization.StorageBasedAuthorizationProvider
>  
> hive.security.authorization.manager=org.apache.hadoop.hive.ql.security.authorization.StorageBasedAuthorizationProvider
>  hive.metastore.client.filter.enabled=true
> hive.metastore.server.filter.enabled=true
> "Show tables" fails at here
> [https://github.com/apache/hive/blob/master/ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveUtils.java#L378-L379]
> With following exception:
> {code:java}
> 2021-07-29 17:09:12,278 ERROR 
> org.apache.hadoop.hive.metastore.RetryingHMSHandler: [pool-14-thread-5]: 
> MetaException(message:Error in 
> HiveMetaStoreAuthorizer.filterTables()java.lang.RuntimeException: class 
> org.apache.hadoop.hive.ql.security.authorization.StorageBasedAuthorizationProvider
>  not 
> org.apache.hadoop.hive.ql.security.authorization.plugin.HiveAuthorizerFactory)
>  
> at 
> org.apache.hadoop.hive.ql.security.authorization.plugin.metastore.HiveMetaStoreAuthorizer.filterTableNames(HiveMetaStoreAuthorizer.java:333)
>  
> at 
> org.apache.hadoop.hive.ql.security.authorization.plugin.metastore.HiveMetaStoreAuthorizer.filterTableNames(HiveMetaStoreAuthorizer.java:163)
>  
> at 
> org.apache.hadoop.hive.metastore.utils.FilterUtils.filterTableNamesIfEnabled(FilterUtils.java:109)
>  
> at 
> org.apache.hadoop.hive.metastore.HiveMetaStore$HMSHandler.get_tables_by_type(HiveMetaStore.java:5905){code}



--
This message was sent by Atlassian Jira
(v8.3.4#803005)


[jira] [Updated] (HIVE-25419) HMS Client/Server filter fails with StorageBasedAuthorizationProvider

2021-08-04 Thread Naresh P R (Jira)


 [ 
https://issues.apache.org/jira/browse/HIVE-25419?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Naresh P R updated HIVE-25419:
--
Summary: HMS Client/Server filter fails with 
StorageBasedAuthorizationProvider  (was: HMS Client filter fails with 
StorageBasedAuthorizationProvider)

> HMS Client/Server filter fails with StorageBasedAuthorizationProvider
> -
>
> Key: HIVE-25419
> URL: https://issues.apache.org/jira/browse/HIVE-25419
> Project: Hive
>  Issue Type: Bug
>Reporter: Naresh P R
>Priority: Major
>
> hive.security.metastore.authorization.manager=org.apache.hadoop.hive.ql.security.authorization.StorageBasedAuthorizationProvider
> hive.security.authorization.manager=org.apache.hadoop.hive.ql.security.authorization.StorageBasedAuthorizationProvider
> hive.metastore.client.filter.enabled=true
> "Show tables" fails at here
> [https://github.com/apache/hive/blob/master/ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveUtils.java#L378-L379]
> With following exception:
> {code:java}
> 2021-07-29 17:09:12,278 ERROR 
> org.apache.hadoop.hive.metastore.RetryingHMSHandler: [pool-14-thread-5]: 
> MetaException(message:Error in 
> HiveMetaStoreAuthorizer.filterTables()java.lang.RuntimeException: class 
> org.apache.hadoop.hive.ql.security.authorization.StorageBasedAuthorizationProvider
>  not 
> org.apache.hadoop.hive.ql.security.authorization.plugin.HiveAuthorizerFactory)
>  
> at 
> org.apache.hadoop.hive.ql.security.authorization.plugin.metastore.HiveMetaStoreAuthorizer.filterTableNames(HiveMetaStoreAuthorizer.java:333)
>  
> at 
> org.apache.hadoop.hive.ql.security.authorization.plugin.metastore.HiveMetaStoreAuthorizer.filterTableNames(HiveMetaStoreAuthorizer.java:163)
>  
> at 
> org.apache.hadoop.hive.metastore.utils.FilterUtils.filterTableNamesIfEnabled(FilterUtils.java:109)
>  
> at 
> org.apache.hadoop.hive.metastore.HiveMetaStore$HMSHandler.get_tables_by_type(HiveMetaStore.java:5905){code}



--
This message was sent by Atlassian Jira
(v8.3.4#803005)


[jira] [Resolved] (HIVE-25338) AIOBE in conv UDF if input is empty

2021-07-23 Thread Naresh P R (Jira)


 [ 
https://issues.apache.org/jira/browse/HIVE-25338?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Naresh P R resolved HIVE-25338.
---
Fix Version/s: 4.0.0
   Resolution: Fixed

> AIOBE in conv UDF if input is empty
> ---
>
> Key: HIVE-25338
> URL: https://issues.apache.org/jira/browse/HIVE-25338
> Project: Hive
>  Issue Type: Bug
>Reporter: Naresh P R
>Assignee: Naresh P R
>Priority: Major
>  Labels: pull-request-available
> Fix For: 4.0.0
>
>  Time Spent: 50m
>  Remaining Estimate: 0h
>
> Repro
> {code:java}
> create table test (a string);
> insert into test values ("");
> select conv(a,16,10) from test;{code}
> Exception trace:
> {code:java}
> Caused by: java.lang.ArrayIndexOutOfBoundsException: 0
>  at org.apache.hadoop.hive.ql.udf.UDFConv.evaluate(UDFConv.java:160){code}



--
This message was sent by Atlassian Jira
(v8.3.4#803005)


[jira] [Commented] (HIVE-25338) AIOBE in conv UDF if input is empty

2021-07-23 Thread Naresh P R (Jira)


[ 
https://issues.apache.org/jira/browse/HIVE-25338?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=17385974#comment-17385974
 ] 

Naresh P R commented on HIVE-25338:
---

Thanks for the review & merge [~maheshk114]

> AIOBE in conv UDF if input is empty
> ---
>
> Key: HIVE-25338
> URL: https://issues.apache.org/jira/browse/HIVE-25338
> Project: Hive
>  Issue Type: Bug
>Reporter: Naresh P R
>Assignee: Naresh P R
>Priority: Major
>  Labels: pull-request-available
>  Time Spent: 50m
>  Remaining Estimate: 0h
>
> Repro
> {code:java}
> create table test (a string);
> insert into test values ("");
> select conv(a,16,10) from test;{code}
> Exception trace:
> {code:java}
> Caused by: java.lang.ArrayIndexOutOfBoundsException: 0
>  at org.apache.hadoop.hive.ql.udf.UDFConv.evaluate(UDFConv.java:160){code}



--
This message was sent by Atlassian Jira
(v8.3.4#803005)


[jira] [Commented] (HIVE-25338) AIOBE in conv UDF if input is empty

2021-07-21 Thread Naresh P R (Jira)


[ 
https://issues.apache.org/jira/browse/HIVE-25338?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=17385252#comment-17385252
 ] 

Naresh P R commented on HIVE-25338:
---

Thanks for the analysis [~zabetak]

I updated PR to return null for empty input.

> AIOBE in conv UDF if input is empty
> ---
>
> Key: HIVE-25338
> URL: https://issues.apache.org/jira/browse/HIVE-25338
> Project: Hive
>  Issue Type: Bug
>Reporter: Naresh P R
>Assignee: Naresh P R
>Priority: Major
>  Labels: pull-request-available
>  Time Spent: 40m
>  Remaining Estimate: 0h
>
> Repro
> {code:java}
> create table test (a string);
> insert into test values ("");
> select conv(a,16,10) from test;{code}
> Exception trace:
> {code:java}
> Caused by: java.lang.ArrayIndexOutOfBoundsException: 0
>  at org.apache.hadoop.hive.ql.udf.UDFConv.evaluate(UDFConv.java:160){code}



--
This message was sent by Atlassian Jira
(v8.3.4#803005)


[jira] [Commented] (HIVE-25338) AIOBE in conv UDF if input is empty

2021-07-16 Thread Naresh P R (Jira)


[ 
https://issues.apache.org/jira/browse/HIVE-25338?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=17382178#comment-17382178
 ] 

Naresh P R commented on HIVE-25338:
---

[~zabetak] We return 0 for invalid input. I can return the same for empty input 
as well.

> AIOBE in conv UDF if input is empty
> ---
>
> Key: HIVE-25338
> URL: https://issues.apache.org/jira/browse/HIVE-25338
> Project: Hive
>  Issue Type: Bug
>Reporter: Naresh P R
>Assignee: Naresh P R
>Priority: Major
>  Labels: pull-request-available
>  Time Spent: 10m
>  Remaining Estimate: 0h
>
> Repro
> {code:java}
> create table test (a string);
> insert into test values ("");
> select conv(a,16,10) from test;{code}
> Exception trace:
> {code:java}
> Caused by: java.lang.ArrayIndexOutOfBoundsException: 0
>  at org.apache.hadoop.hive.ql.udf.UDFConv.evaluate(UDFConv.java:160){code}



--
This message was sent by Atlassian Jira
(v8.3.4#803005)


[jira] [Updated] (HIVE-25338) AIOBE in conv UDF if input is empty

2021-07-16 Thread Naresh P R (Jira)


 [ 
https://issues.apache.org/jira/browse/HIVE-25338?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Naresh P R updated HIVE-25338:
--
Issue Type: Bug  (was: New Feature)

> AIOBE in conv UDF if input is empty
> ---
>
> Key: HIVE-25338
> URL: https://issues.apache.org/jira/browse/HIVE-25338
> Project: Hive
>  Issue Type: Bug
>Reporter: Naresh P R
>Assignee: Naresh P R
>Priority: Major
>  Labels: pull-request-available
>  Time Spent: 10m
>  Remaining Estimate: 0h
>
> Repro
> {code:java}
> create table test (a string);
> insert into test values ("");
> select conv(a,16,10) from test;{code}
> Exception trace:
> {code:java}
> Caused by: java.lang.ArrayIndexOutOfBoundsException: 0
>  at org.apache.hadoop.hive.ql.udf.UDFConv.evaluate(UDFConv.java:160){code}



--
This message was sent by Atlassian Jira
(v8.3.4#803005)


[jira] [Assigned] (HIVE-25338) AIOBE in conv UDF if input is empty

2021-07-16 Thread Naresh P R (Jira)


 [ 
https://issues.apache.org/jira/browse/HIVE-25338?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Naresh P R reassigned HIVE-25338:
-


> AIOBE in conv UDF if input is empty
> ---
>
> Key: HIVE-25338
> URL: https://issues.apache.org/jira/browse/HIVE-25338
> Project: Hive
>  Issue Type: New Feature
>Reporter: Naresh P R
>Assignee: Naresh P R
>Priority: Major
>
> Repro
> {code:java}
> create table test (a string);
> insert into test values ("");
> select conv(a,16,10) from test;{code}
> Exception trace:
> {code:java}
> Caused by: java.lang.ArrayIndexOutOfBoundsException: 0
>  at org.apache.hadoop.hive.ql.udf.UDFConv.evaluate(UDFConv.java:160){code}



--
This message was sent by Atlassian Jira
(v8.3.4#803005)


[jira] [Resolved] (HIVE-25198) CTAS external table with camelcase and HMS translation ON is returning 0 records

2021-06-04 Thread Naresh P R (Jira)


 [ 
https://issues.apache.org/jira/browse/HIVE-25198?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Naresh P R resolved HIVE-25198.
---
Resolution: Duplicate

> CTAS external table with camelcase and HMS translation ON is returning 0 
> records
> 
>
> Key: HIVE-25198
> URL: https://issues.apache.org/jira/browse/HIVE-25198
> Project: Hive
>  Issue Type: Bug
>Reporter: Naresh P R
>Assignee: Naresh P R
>Priority: Major
>  Labels: pull-request-available
>  Time Spent: 20m
>  Remaining Estimate: 0h
>
> create external table TarGet as select * from source;
> Above query creates tableLocation with CamelCase if HMS Translation is ON, 
> whereas MoveTask will use lowerCase table path.
> eg., 
> {code:java}
> ==> Desc formatted target <==
> Location:  hdfs:///warehouse/tablespace/external/hive/test.db/TarGet
> ==> MoveTask <==
> INFO : Moving data to directory 
> hdfs:///warehouse/tablespace/external/hive/test.db/target from 
> hdfs:///warehouse/tablespace/external/hive/test.db/.hive-staging_hive_2021-06-04_03-02-36_272_669287187808252905-12/-ext-10002
> ==> HMS Translation <==
> 2021-06-04 03:02:45,772 INFO  
> org.apache.hadoop.hive.metastore.MetastoreDefaultTransformer: 
> [pool-7-thread-8]: Transformer returning table:Table(tableName:TarGet, 
> dbName:test, owner:hive, createTime:1622775765, lastAccessTime:0, 
> retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:id, 
> type:varchar(10), comment:null)], location: 
> hdfs:///warehouse/tablespace/external/hive/test.db/TarGet,{code}
> After CTAS, Select query on target table will return 0 rows.
>  



--
This message was sent by Atlassian Jira
(v8.3.4#803005)


[jira] [Commented] (HIVE-25198) CTAS external table with camelcase and HMS translation ON is returning 0 records

2021-06-04 Thread Naresh P R (Jira)


[ 
https://issues.apache.org/jira/browse/HIVE-25198?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=17357461#comment-17357461
 ] 

Naresh P R commented on HIVE-25198:
---

Thanks [~Rajkumar Singh], I will close it as dup.

> CTAS external table with camelcase and HMS translation ON is returning 0 
> records
> 
>
> Key: HIVE-25198
> URL: https://issues.apache.org/jira/browse/HIVE-25198
> Project: Hive
>  Issue Type: Bug
>Reporter: Naresh P R
>Assignee: Naresh P R
>Priority: Major
>  Labels: pull-request-available
>  Time Spent: 20m
>  Remaining Estimate: 0h
>
> create external table TarGet as select * from source;
> Above query creates tableLocation with CamelCase if HMS Translation is ON, 
> whereas MoveTask will use lowerCase table path.
> eg., 
> {code:java}
> ==> Desc formatted target <==
> Location:  hdfs:///warehouse/tablespace/external/hive/test.db/TarGet
> ==> MoveTask <==
> INFO : Moving data to directory 
> hdfs:///warehouse/tablespace/external/hive/test.db/target from 
> hdfs:///warehouse/tablespace/external/hive/test.db/.hive-staging_hive_2021-06-04_03-02-36_272_669287187808252905-12/-ext-10002
> ==> HMS Translation <==
> 2021-06-04 03:02:45,772 INFO  
> org.apache.hadoop.hive.metastore.MetastoreDefaultTransformer: 
> [pool-7-thread-8]: Transformer returning table:Table(tableName:TarGet, 
> dbName:test, owner:hive, createTime:1622775765, lastAccessTime:0, 
> retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:id, 
> type:varchar(10), comment:null)], location: 
> hdfs:///warehouse/tablespace/external/hive/test.db/TarGet,{code}
> After CTAS, Select query on target table will return 0 rows.
>  



--
This message was sent by Atlassian Jira
(v8.3.4#803005)


[jira] [Updated] (HIVE-25198) CTAS external table with camelcase and HMS translation ON is returning 0 records

2021-06-03 Thread Naresh P R (Jira)


 [ 
https://issues.apache.org/jira/browse/HIVE-25198?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Naresh P R updated HIVE-25198:
--
Description: 
create external table TarGet as select * from source;

Above query creates tableLocation with CamelCase if HMS Translation is ON, 
whereas MoveTask will use lowerCase table path.

eg., 
{code:java}
==> Desc formatted target <==
Location:  hdfs:///warehouse/tablespace/external/hive/test.db/TarGet

==> MoveTask <==
INFO : Moving data to directory 
hdfs:///warehouse/tablespace/external/hive/test.db/target from 
hdfs:///warehouse/tablespace/external/hive/test.db/.hive-staging_hive_2021-06-04_03-02-36_272_669287187808252905-12/-ext-10002

==> HMS Translation <==
2021-06-04 03:02:45,772 INFO  
org.apache.hadoop.hive.metastore.MetastoreDefaultTransformer: 
[pool-7-thread-8]: Transformer returning table:Table(tableName:TarGet, 
dbName:test, owner:hive, createTime:1622775765, lastAccessTime:0, retention:0, 
sd:StorageDescriptor(cols:[FieldSchema(name:id, type:varchar(10), 
comment:null)], location: 
hdfs:///warehouse/tablespace/external/hive/test.db/TarGet,{code}
After CTAS, Select query on target table will return 0 rows.

 

  was:
create external table TarGet as select * from source;

Above query creates tableLocation with CamelCase if HMS Translation is ON, 
whereas MoveTask will use lowerCase table path.

eg., 

 
{code:java}
==> Desc formatted target <==
Location:  hdfs:///warehouse/tablespace/external/hive/test.db/TarGet

==> MoveTask <==
INFO : Moving data to directory 
hdfs:///warehouse/tablespace/external/hive/test.db/target from 
hdfs:///warehouse/tablespace/external/hive/test.db/.hive-staging_hive_2021-06-04_03-02-36_272_669287187808252905-12/-ext-10002

==> HMS Translation <==
2021-06-04 03:02:45,772 INFO  
org.apache.hadoop.hive.metastore.MetastoreDefaultTransformer: 
[pool-7-thread-8]: Transformer returning table:Table(tableName:TarGet, 
dbName:test, owner:hive, createTime:1622775765, lastAccessTime:0, retention:0, 
sd:StorageDescriptor(cols:[FieldSchema(name:id, type:varchar(10), 
comment:null)], location: 
hdfs:///warehouse/tablespace/external/hive/ajay.db/TarGet,{code}
 

Select query after CTAS will return 0 rows because of this.

 


> CTAS external table with camelcase and HMS translation ON is returning 0 
> records
> 
>
> Key: HIVE-25198
> URL: https://issues.apache.org/jira/browse/HIVE-25198
> Project: Hive
>  Issue Type: Bug
>Reporter: Naresh P R
>Assignee: Naresh P R
>Priority: Major
>
> create external table TarGet as select * from source;
> Above query creates tableLocation with CamelCase if HMS Translation is ON, 
> whereas MoveTask will use lowerCase table path.
> eg., 
> {code:java}
> ==> Desc formatted target <==
> Location:  hdfs:///warehouse/tablespace/external/hive/test.db/TarGet
> ==> MoveTask <==
> INFO : Moving data to directory 
> hdfs:///warehouse/tablespace/external/hive/test.db/target from 
> hdfs:///warehouse/tablespace/external/hive/test.db/.hive-staging_hive_2021-06-04_03-02-36_272_669287187808252905-12/-ext-10002
> ==> HMS Translation <==
> 2021-06-04 03:02:45,772 INFO  
> org.apache.hadoop.hive.metastore.MetastoreDefaultTransformer: 
> [pool-7-thread-8]: Transformer returning table:Table(tableName:TarGet, 
> dbName:test, owner:hive, createTime:1622775765, lastAccessTime:0, 
> retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:id, 
> type:varchar(10), comment:null)], location: 
> hdfs:///warehouse/tablespace/external/hive/test.db/TarGet,{code}
> After CTAS, Select query on target table will return 0 rows.
>  



--
This message was sent by Atlassian Jira
(v8.3.4#803005)


[jira] [Updated] (HIVE-25198) CTAS external table with camelcase and HMS translation ON is returning 0 records

2021-06-03 Thread Naresh P R (Jira)


 [ 
https://issues.apache.org/jira/browse/HIVE-25198?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Naresh P R updated HIVE-25198:
--
Summary: CTAS external table with camelcase and HMS translation ON is 
returning 0 records  (was: CTAS external table with camelcase & HMS translation 
ON is returning 0 records)

> CTAS external table with camelcase and HMS translation ON is returning 0 
> records
> 
>
> Key: HIVE-25198
> URL: https://issues.apache.org/jira/browse/HIVE-25198
> Project: Hive
>  Issue Type: Bug
>Reporter: Naresh P R
>Assignee: Naresh P R
>Priority: Major
>
> create external table TarGet as select * from source;
> Above query creates tableLocation with CamelCase if HMS Translation is ON, 
> whereas MoveTask will use lowerCase table path.
> eg., 
>  
> {code:java}
> ==> Desc formatted target <==
> Location:  hdfs:///warehouse/tablespace/external/hive/test.db/TarGet
> ==> MoveTask <==
> INFO : Moving data to directory 
> hdfs:///warehouse/tablespace/external/hive/test.db/target from 
> hdfs:///warehouse/tablespace/external/hive/test.db/.hive-staging_hive_2021-06-04_03-02-36_272_669287187808252905-12/-ext-10002
> ==> HMS Translation <==
> 2021-06-04 03:02:45,772 INFO  
> org.apache.hadoop.hive.metastore.MetastoreDefaultTransformer: 
> [pool-7-thread-8]: Transformer returning table:Table(tableName:TarGet, 
> dbName:test, owner:hive, createTime:1622775765, lastAccessTime:0, 
> retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:id, 
> type:varchar(10), comment:null)], location: 
> hdfs:///warehouse/tablespace/external/hive/ajay.db/TarGet,{code}
>  
> Select query after CTAS will return 0 rows because of this.
>  



--
This message was sent by Atlassian Jira
(v8.3.4#803005)


[jira] [Assigned] (HIVE-25198) CTAS external table with camelcase & HMS translation ON is returning 0 records

2021-06-03 Thread Naresh P R (Jira)


 [ 
https://issues.apache.org/jira/browse/HIVE-25198?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Naresh P R reassigned HIVE-25198:
-


> CTAS external table with camelcase & HMS translation ON is returning 0 records
> --
>
> Key: HIVE-25198
> URL: https://issues.apache.org/jira/browse/HIVE-25198
> Project: Hive
>  Issue Type: Bug
>Reporter: Naresh P R
>Assignee: Naresh P R
>Priority: Major
>
> create external table TarGet as select * from source;
> Above query creates tableLocation with CamelCase if HMS Translation is ON, 
> whereas MoveTask will use lowerCase table path.
> eg., 
>  
> {code:java}
> ==> Desc formatted target <==
> Location:  hdfs:///warehouse/tablespace/external/hive/test.db/TarGet
> ==> MoveTask <==
> INFO : Moving data to directory 
> hdfs:///warehouse/tablespace/external/hive/test.db/target from 
> hdfs:///warehouse/tablespace/external/hive/test.db/.hive-staging_hive_2021-06-04_03-02-36_272_669287187808252905-12/-ext-10002
> ==> HMS Translation <==
> 2021-06-04 03:02:45,772 INFO  
> org.apache.hadoop.hive.metastore.MetastoreDefaultTransformer: 
> [pool-7-thread-8]: Transformer returning table:Table(tableName:TarGet, 
> dbName:test, owner:hive, createTime:1622775765, lastAccessTime:0, 
> retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:id, 
> type:varchar(10), comment:null)], location: 
> hdfs:///warehouse/tablespace/external/hive/ajay.db/TarGet,{code}
>  
> Select query after CTAS will return 0 rows because of this.
>  



--
This message was sent by Atlassian Jira
(v8.3.4#803005)


[jira] [Updated] (HIVE-25118) CTAS accepts column's with dot(.) if CBO fails

2021-05-14 Thread Naresh P R (Jira)


 [ 
https://issues.apache.org/jira/browse/HIVE-25118?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Naresh P R updated HIVE-25118:
--
Description: 
create table t1(id int);

create table t2(id int);

create table t3 as select t1.id, t2.id from t1 join t2;

CBO fails if "hive.stats.column.autogather=true" with "SemanticException 
Ambiguous column reference: id" & CTAS passes with following table schema
{code:java}
desc t3;
+---++--+
| col_name  | data_type  | comment  |
+---++--+
| t1.id | int|  |
| t2.id | int|  |
+---++--+{code}
disabling column stats will fail the query as expected.
{code:java}
set hive.stats.column.autogather=false;
create table t3 as select t1.id, t2.id from t1 join t2;
Error: Error while compiling statement: FAILED: SemanticException [Error 
10036]: Duplicate column name: id (state=42000,code=10036)
{code}
PS : create table t3(`t1.id` int, `t2.id` int); will fail for dot(.) in column 
name.

  was:
create table t1(id int);

create table t2(id int);

create table t3 as select t1.id, t2.id from t1 join t2;

CBO fails if "hive.stats.column.autogather=true" with "SemanticException 
Ambiguous column reference: id" & CTAS passes with following table schema
{code:java}
desc t3;
+---++--+
| col_name  | data_type  | comment  |
+---++--+
| t1.id | int|  |
| t2.id | int|  |
+---++--+{code}
disabling column stats will fail the query
{code:java}
set hive.stats.column.autogather=false;
create table t3 as select t1.id, t2.id from t1 join t2;
Error: Error while compiling statement: FAILED: SemanticException [Error 
10036]: Duplicate column name: id (state=42000,code=10036)
{code}
PS : create table t3(`t1.id` int, `t2.id` int); will fail for dot(.) in column 
name.


> CTAS accepts column's with dot(.) if CBO fails
> --
>
> Key: HIVE-25118
> URL: https://issues.apache.org/jira/browse/HIVE-25118
> Project: Hive
>  Issue Type: Bug
>Reporter: Naresh P R
>Priority: Major
>
> create table t1(id int);
> create table t2(id int);
> create table t3 as select t1.id, t2.id from t1 join t2;
> CBO fails if "hive.stats.column.autogather=true" with "SemanticException 
> Ambiguous column reference: id" & CTAS passes with following table schema
> {code:java}
> desc t3;
> +---++--+
> | col_name  | data_type  | comment  |
> +---++--+
> | t1.id | int|  |
> | t2.id | int|  |
> +---++--+{code}
> disabling column stats will fail the query as expected.
> {code:java}
> set hive.stats.column.autogather=false;
> create table t3 as select t1.id, t2.id from t1 join t2;
> Error: Error while compiling statement: FAILED: SemanticException [Error 
> 10036]: Duplicate column name: id (state=42000,code=10036)
> {code}
> PS : create table t3(`t1.id` int, `t2.id` int); will fail for dot(.) in 
> column name.



--
This message was sent by Atlassian Jira
(v8.3.4#803005)


[jira] [Updated] (HIVE-25118) CTAS accepts column's with dot(.) if CBO fails

2021-05-14 Thread Naresh P R (Jira)


 [ 
https://issues.apache.org/jira/browse/HIVE-25118?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Naresh P R updated HIVE-25118:
--
Description: 
create table t1(id int);

create table t2(id int);

create table t3 as select t1.id, t2.id from t1 join t2;

CBO fails if "hive.stats.column.autogather=true" with "SemanticException 
Ambiguous column reference: id" & CTAS passes with following table schema
{code:java}
desc t3;
+---++--+
| col_name  | data_type  | comment  |
+---++--+
| t1.id | int|  |
| t2.id | int|  |
+---++--+{code}
disabling column stats will fail the query
{code:java}
set hive.stats.column.autogather=false;
create table t3 as select t1.id, t2.id from t1 join t2;
Error: Error while compiling statement: FAILED: SemanticException [Error 
10036]: Duplicate column name: id (state=42000,code=10036)
{code}
PS : create table t3(`t1.id` int, `t2.id` int); will fail for dot(.) in column 
name.

  was:
create table t1(id int);

create table t2(id int);

create table t3 as select t1.id, t2.id from t1 join t2;

CBO fails if "hive.stats.column.autogather=true" with "SemanticException 
Ambiguous column reference: id" & CTAS passes with following table schema
{code:java}
desc t3;
+---++--+
| col_name  | data_type  | comment  |
+---++--+
| t1.id | int|  |
| t2.id | int|  |
+---++--+{code}
create table t3(`t1.id` int, `t2.id` int); will fail for dot(.) in column name.


> CTAS accepts column's with dot(.) if CBO fails
> --
>
> Key: HIVE-25118
> URL: https://issues.apache.org/jira/browse/HIVE-25118
> Project: Hive
>  Issue Type: Bug
>Reporter: Naresh P R
>Priority: Major
>
> create table t1(id int);
> create table t2(id int);
> create table t3 as select t1.id, t2.id from t1 join t2;
> CBO fails if "hive.stats.column.autogather=true" with "SemanticException 
> Ambiguous column reference: id" & CTAS passes with following table schema
> {code:java}
> desc t3;
> +---++--+
> | col_name  | data_type  | comment  |
> +---++--+
> | t1.id | int|  |
> | t2.id | int|  |
> +---++--+{code}
> disabling column stats will fail the query
> {code:java}
> set hive.stats.column.autogather=false;
> create table t3 as select t1.id, t2.id from t1 join t2;
> Error: Error while compiling statement: FAILED: SemanticException [Error 
> 10036]: Duplicate column name: id (state=42000,code=10036)
> {code}
> PS : create table t3(`t1.id` int, `t2.id` int); will fail for dot(.) in 
> column name.



--
This message was sent by Atlassian Jira
(v8.3.4#803005)


[jira] [Updated] (HIVE-25084) Incorrect aggregate results on bucketed table

2021-04-30 Thread Naresh P R (Jira)


 [ 
https://issues.apache.org/jira/browse/HIVE-25084?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Naresh P R updated HIVE-25084:
--
Attachment: test4.q

> Incorrect aggregate results on bucketed table
> -
>
> Key: HIVE-25084
> URL: https://issues.apache.org/jira/browse/HIVE-25084
> Project: Hive
>  Issue Type: Bug
>Reporter: Naresh P R
>Priority: Major
> Attachments: test4.q
>
>
> Steps to repro
> {code:java}
> CREATE TABLE test_table(
> col1 int,
> col2 char(32),
> col3 varchar(3))
> CLUSTERED BY (col2)
>  SORTED BY (
>col2 ASC,
>col3 ASC,
>col1 ASC)
>  INTO 32 BUCKETS stored as orc;
> set hive.query.results.cache.enabled=false;
> insert into test_table values(2, "123456", "15");
> insert into test_table values(1, "123456", "15");
> SELECT col2, col3, max(col1) AS max_sequence FROM test_table GROUP BY col2, 
> col3;
> ==> LocalFetch correct result <==
> 123456 15 2 
> ==> Wrong result with Tez/Llap <==
> set hive.fetch.task.conversion=none;
> 123456 15 2 
> 123456 15 1 
> ==> Correct result with Tez/Llap disabling map aggregation <==
> set hive.map.aggr=false;
> 123456 15 2 
> {code}



--
This message was sent by Atlassian Jira
(v8.3.4#803005)


[jira] [Updated] (HIVE-23779) BasicStatsTask Info is not getting printed in beeline console

2021-03-15 Thread Naresh P R (Jira)


 [ 
https://issues.apache.org/jira/browse/HIVE-23779?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Naresh P R updated HIVE-23779:
--
Fix Version/s: 4.0.0
   Resolution: Fixed
   Status: Resolved  (was: Patch Available)

> BasicStatsTask Info is not getting printed in beeline console
> -
>
> Key: HIVE-23779
> URL: https://issues.apache.org/jira/browse/HIVE-23779
> Project: Hive
>  Issue Type: Bug
>Reporter: Naresh P R
>Assignee: Naresh P R
>Priority: Major
>  Labels: pull-request-available
> Fix For: 4.0.0
>
>  Time Spent: 1h 50m
>  Remaining Estimate: 0h
>
> After HIVE-16061, partition basic stats are not getting printed in beeline 
> console.
> {code:java}
> INFO : Partition {dt=2020-06-29} stats: [numFiles=21, numRows=22, 
> totalSize=14607, rawDataSize=0]{code}



--
This message was sent by Atlassian Jira
(v8.3.4#803005)


[jira] [Commented] (HIVE-23779) BasicStatsTask Info is not getting printed in beeline console

2021-03-15 Thread Naresh P R (Jira)


[ 
https://issues.apache.org/jira/browse/HIVE-23779?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=17302014#comment-17302014
 ] 

Naresh P R commented on HIVE-23779:
---

Thanks for the review & merge [~mgergely]

> BasicStatsTask Info is not getting printed in beeline console
> -
>
> Key: HIVE-23779
> URL: https://issues.apache.org/jira/browse/HIVE-23779
> Project: Hive
>  Issue Type: Bug
>Reporter: Naresh P R
>Assignee: Naresh P R
>Priority: Major
>  Labels: pull-request-available
>  Time Spent: 1h 50m
>  Remaining Estimate: 0h
>
> After HIVE-16061, partition basic stats are not getting printed in beeline 
> console.
> {code:java}
> INFO : Partition {dt=2020-06-29} stats: [numFiles=21, numRows=22, 
> totalSize=14607, rawDataSize=0]{code}



--
This message was sent by Atlassian Jira
(v8.3.4#803005)


[jira] [Updated] (HIVE-24882) Compaction task reattempt fails with FileAlreadyExistsException for DeleteEventWriter

2021-03-12 Thread Naresh P R (Jira)


 [ 
https://issues.apache.org/jira/browse/HIVE-24882?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Naresh P R updated HIVE-24882:
--
Status: Patch Available  (was: In Progress)

> Compaction task reattempt fails with FileAlreadyExistsException for 
> DeleteEventWriter
> -
>
> Key: HIVE-24882
> URL: https://issues.apache.org/jira/browse/HIVE-24882
> Project: Hive
>  Issue Type: Bug
>Reporter: Naresh P R
>Assignee: Naresh P R
>Priority: Major
>  Labels: pull-request-available
>  Time Spent: 10m
>  Remaining Estimate: 0h
>
> If first attempt of compaction task is pre-empted by yarn or execution failed 
> because of environmental issues, re-attempted tasks will fail with 
> FileAlreadyExistsException
> {noformat}
> Error: org.apache.hadoop.fs.FileAlreadyExistsException: 
> /warehouse/tablespace/managed/hive/test.db/acid_table/dept=cse/_tmp_xxx/delete_delta_001_010/bucket_0
> at 
> org.apache.hadoop.hdfs.server.namenode.FSDirWriteFileOp.startFile(FSDirWriteFileOp.java:380)
> at 
> org.apache.hadoop.hdfs.server.namenode.FSNamesystem.startFileInt(FSNamesystem.java:2453)
> at 
> org.apache.hadoop.hdfs.server.namenode.FSNamesystem.startFile(FSNamesystem.java:2351)
>  
> at 
> org.apache.hadoop.hdfs.server.namenode.NameNodeRpcServer.create(NameNodeRpcServer.java:774)
>  
> at 
> org.apache.hadoop.hdfs.protocolPB.ClientNamenodeProtocolServerSideTranslatorPB.create(ClientNamenodeProtocolServerSideTranslatorPB.java:462)
>  
> at 
> org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos$ClientNamenodeProtocol$2.callBlockingMethod(ClientNamenodeProtocolProtos.java)
>  
> at 
> org.apache.hadoop.ipc.ProtobufRpcEngine$Server$ProtoBufRpcInvoker.call(ProtobufRpcEngine.java:524)
>  
> at org.apache.hadoop.ipc.RPC$Server.call(RPC.java:1025) 
> at org.apache.hadoop.ipc.Server$RpcCall.run(Server.java:876) 
> at org.apache.hadoop.ipc.Server$RpcCall.run(Server.java:822) 
> at java.security.AccessController.doPrivileged(Native Method) 
> at javax.security.auth.Subject.doAs(Subject.java:422) 
> at 
> org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1730)
>  
> at org.apache.hadoop.ipc.Server$Handler.run(Server.java:2682) 
> at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method) 
> at 
> sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:62)
>  
> at 
> sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
>  
> at java.lang.reflect.Constructor.newInstance(Constructor.java:423) 
> at 
> org.apache.hadoop.ipc.RemoteException.instantiateException(RemoteException.java:121)
>  
> at 
> org.apache.hadoop.ipc.RemoteException.unwrapRemoteException(RemoteException.java:88)
>  
> at 
> org.apache.hadoop.hdfs.DFSOutputStream.newStreamForCreate(DFSOutputStream.java:278)
>  
> at org.apache.hadoop.hdfs.DFSClient.create(DFSClient.java:1211) 
> at org.apache.hadoop.hdfs.DFSClient.create(DFSClient.java:1190) 
> at org.apache.hadoop.hdfs.DFSClient.create(DFSClient.java:1128) 
> at 
> org.apache.hadoop.hdfs.DistributedFileSystem$8.doCall(DistributedFileSystem.java:531)
>  
> at 
> org.apache.hadoop.hdfs.DistributedFileSystem$8.doCall(DistributedFileSystem.java:528)
>  
> at 
> org.apache.hadoop.fs.FileSystemLinkResolver.resolve(FileSystemLinkResolver.java:81)
>  
> at 
> org.apache.hadoop.hdfs.DistributedFileSystem.create(DistributedFileSystem.java:542)
>  
> at 
> org.apache.hadoop.hdfs.DistributedFileSystem.create(DistributedFileSystem.java:469)
>  
> at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1118) 
> at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1098) 
> at org.apache.orc.impl.PhysicalFsWriter.(PhysicalFsWriter.java:95) 
> at org.apache.orc.impl.WriterImpl.(WriterImpl.java:177) 
> at org.apache.hadoop.hive.ql.io.orc.WriterImpl.(WriterImpl.java:94) 
> at org.apache.hadoop.hive.ql.io.orc.OrcFile.createWriter(OrcFile.java:378) 
> at 
> org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat.getRawRecordWriter(OrcOutputFormat.java:299)
>  
> at 
> org.apache.hadoop.hive.ql.txn.compactor.CompactorMR$CompactorMap.getDeleteEventWriter(CompactorMR.java:1084)
>  
> at 
> org.apache.hadoop.hive.ql.txn.compactor.CompactorMR$CompactorMap.map(CompactorMR.java:995)
>  
> at 
> org.apache.hadoop.hive.ql.txn.compactor.CompactorMR$CompactorMap.map(CompactorMR.java:958){noformat}



--
This message was sent by Atlassian Jira
(v8.3.4#803005)


[jira] [Work started] (HIVE-24882) Compaction task reattempt fails with FileAlreadyExistsException for DeleteEventWriter

2021-03-12 Thread Naresh P R (Jira)


 [ 
https://issues.apache.org/jira/browse/HIVE-24882?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Work on HIVE-24882 started by Naresh P R.
-
> Compaction task reattempt fails with FileAlreadyExistsException for 
> DeleteEventWriter
> -
>
> Key: HIVE-24882
> URL: https://issues.apache.org/jira/browse/HIVE-24882
> Project: Hive
>  Issue Type: Bug
>Reporter: Naresh P R
>Assignee: Naresh P R
>Priority: Major
>  Labels: pull-request-available
>  Time Spent: 10m
>  Remaining Estimate: 0h
>
> If first attempt of compaction task is pre-empted by yarn or execution failed 
> because of environmental issues, re-attempted tasks will fail with 
> FileAlreadyExistsException
> {noformat}
> Error: org.apache.hadoop.fs.FileAlreadyExistsException: 
> /warehouse/tablespace/managed/hive/test.db/acid_table/dept=cse/_tmp_xxx/delete_delta_001_010/bucket_0
> at 
> org.apache.hadoop.hdfs.server.namenode.FSDirWriteFileOp.startFile(FSDirWriteFileOp.java:380)
> at 
> org.apache.hadoop.hdfs.server.namenode.FSNamesystem.startFileInt(FSNamesystem.java:2453)
> at 
> org.apache.hadoop.hdfs.server.namenode.FSNamesystem.startFile(FSNamesystem.java:2351)
>  
> at 
> org.apache.hadoop.hdfs.server.namenode.NameNodeRpcServer.create(NameNodeRpcServer.java:774)
>  
> at 
> org.apache.hadoop.hdfs.protocolPB.ClientNamenodeProtocolServerSideTranslatorPB.create(ClientNamenodeProtocolServerSideTranslatorPB.java:462)
>  
> at 
> org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos$ClientNamenodeProtocol$2.callBlockingMethod(ClientNamenodeProtocolProtos.java)
>  
> at 
> org.apache.hadoop.ipc.ProtobufRpcEngine$Server$ProtoBufRpcInvoker.call(ProtobufRpcEngine.java:524)
>  
> at org.apache.hadoop.ipc.RPC$Server.call(RPC.java:1025) 
> at org.apache.hadoop.ipc.Server$RpcCall.run(Server.java:876) 
> at org.apache.hadoop.ipc.Server$RpcCall.run(Server.java:822) 
> at java.security.AccessController.doPrivileged(Native Method) 
> at javax.security.auth.Subject.doAs(Subject.java:422) 
> at 
> org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1730)
>  
> at org.apache.hadoop.ipc.Server$Handler.run(Server.java:2682) 
> at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method) 
> at 
> sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:62)
>  
> at 
> sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
>  
> at java.lang.reflect.Constructor.newInstance(Constructor.java:423) 
> at 
> org.apache.hadoop.ipc.RemoteException.instantiateException(RemoteException.java:121)
>  
> at 
> org.apache.hadoop.ipc.RemoteException.unwrapRemoteException(RemoteException.java:88)
>  
> at 
> org.apache.hadoop.hdfs.DFSOutputStream.newStreamForCreate(DFSOutputStream.java:278)
>  
> at org.apache.hadoop.hdfs.DFSClient.create(DFSClient.java:1211) 
> at org.apache.hadoop.hdfs.DFSClient.create(DFSClient.java:1190) 
> at org.apache.hadoop.hdfs.DFSClient.create(DFSClient.java:1128) 
> at 
> org.apache.hadoop.hdfs.DistributedFileSystem$8.doCall(DistributedFileSystem.java:531)
>  
> at 
> org.apache.hadoop.hdfs.DistributedFileSystem$8.doCall(DistributedFileSystem.java:528)
>  
> at 
> org.apache.hadoop.fs.FileSystemLinkResolver.resolve(FileSystemLinkResolver.java:81)
>  
> at 
> org.apache.hadoop.hdfs.DistributedFileSystem.create(DistributedFileSystem.java:542)
>  
> at 
> org.apache.hadoop.hdfs.DistributedFileSystem.create(DistributedFileSystem.java:469)
>  
> at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1118) 
> at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1098) 
> at org.apache.orc.impl.PhysicalFsWriter.(PhysicalFsWriter.java:95) 
> at org.apache.orc.impl.WriterImpl.(WriterImpl.java:177) 
> at org.apache.hadoop.hive.ql.io.orc.WriterImpl.(WriterImpl.java:94) 
> at org.apache.hadoop.hive.ql.io.orc.OrcFile.createWriter(OrcFile.java:378) 
> at 
> org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat.getRawRecordWriter(OrcOutputFormat.java:299)
>  
> at 
> org.apache.hadoop.hive.ql.txn.compactor.CompactorMR$CompactorMap.getDeleteEventWriter(CompactorMR.java:1084)
>  
> at 
> org.apache.hadoop.hive.ql.txn.compactor.CompactorMR$CompactorMap.map(CompactorMR.java:995)
>  
> at 
> org.apache.hadoop.hive.ql.txn.compactor.CompactorMR$CompactorMap.map(CompactorMR.java:958){noformat}



--
This message was sent by Atlassian Jira
(v8.3.4#803005)


[jira] [Updated] (HIVE-24882) Compaction task reattempt fails with FileAlreadyExistsException for DeleteEventWriter

2021-03-12 Thread Naresh P R (Jira)


 [ 
https://issues.apache.org/jira/browse/HIVE-24882?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Naresh P R updated HIVE-24882:
--
Description: 
If first attempt of compaction task is pre-empted by yarn or execution failed 
because of environmental issues, re-attempted tasks will fail with 
FileAlreadyExistsException
{noformat}
Error: org.apache.hadoop.fs.FileAlreadyExistsException: 
/warehouse/tablespace/managed/hive/test.db/acid_table/dept=cse/_tmp_xxx/delete_delta_001_010/bucket_0
at 
org.apache.hadoop.hdfs.server.namenode.FSDirWriteFileOp.startFile(FSDirWriteFileOp.java:380)
at 
org.apache.hadoop.hdfs.server.namenode.FSNamesystem.startFileInt(FSNamesystem.java:2453)
at 
org.apache.hadoop.hdfs.server.namenode.FSNamesystem.startFile(FSNamesystem.java:2351)
 
at 
org.apache.hadoop.hdfs.server.namenode.NameNodeRpcServer.create(NameNodeRpcServer.java:774)
 
at 
org.apache.hadoop.hdfs.protocolPB.ClientNamenodeProtocolServerSideTranslatorPB.create(ClientNamenodeProtocolServerSideTranslatorPB.java:462)
 
at 
org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos$ClientNamenodeProtocol$2.callBlockingMethod(ClientNamenodeProtocolProtos.java)
 
at 
org.apache.hadoop.ipc.ProtobufRpcEngine$Server$ProtoBufRpcInvoker.call(ProtobufRpcEngine.java:524)
 
at org.apache.hadoop.ipc.RPC$Server.call(RPC.java:1025) 
at org.apache.hadoop.ipc.Server$RpcCall.run(Server.java:876) 
at org.apache.hadoop.ipc.Server$RpcCall.run(Server.java:822) 
at java.security.AccessController.doPrivileged(Native Method) 
at javax.security.auth.Subject.doAs(Subject.java:422) 
at 
org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1730)
 
at org.apache.hadoop.ipc.Server$Handler.run(Server.java:2682) 
at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method) 
at 
sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:62)
 
at 
sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
 
at java.lang.reflect.Constructor.newInstance(Constructor.java:423) 
at 
org.apache.hadoop.ipc.RemoteException.instantiateException(RemoteException.java:121)
 
at 
org.apache.hadoop.ipc.RemoteException.unwrapRemoteException(RemoteException.java:88)
 
at 
org.apache.hadoop.hdfs.DFSOutputStream.newStreamForCreate(DFSOutputStream.java:278)
 
at org.apache.hadoop.hdfs.DFSClient.create(DFSClient.java:1211) 
at org.apache.hadoop.hdfs.DFSClient.create(DFSClient.java:1190) 
at org.apache.hadoop.hdfs.DFSClient.create(DFSClient.java:1128) 
at 
org.apache.hadoop.hdfs.DistributedFileSystem$8.doCall(DistributedFileSystem.java:531)
 
at 
org.apache.hadoop.hdfs.DistributedFileSystem$8.doCall(DistributedFileSystem.java:528)
 
at 
org.apache.hadoop.fs.FileSystemLinkResolver.resolve(FileSystemLinkResolver.java:81)
 
at 
org.apache.hadoop.hdfs.DistributedFileSystem.create(DistributedFileSystem.java:542)
 
at 
org.apache.hadoop.hdfs.DistributedFileSystem.create(DistributedFileSystem.java:469)
 
at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1118) 
at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1098) 
at org.apache.orc.impl.PhysicalFsWriter.(PhysicalFsWriter.java:95) 
at org.apache.orc.impl.WriterImpl.(WriterImpl.java:177) 
at org.apache.hadoop.hive.ql.io.orc.WriterImpl.(WriterImpl.java:94) 
at org.apache.hadoop.hive.ql.io.orc.OrcFile.createWriter(OrcFile.java:378) 
at 
org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat.getRawRecordWriter(OrcOutputFormat.java:299)
 
at 
org.apache.hadoop.hive.ql.txn.compactor.CompactorMR$CompactorMap.getDeleteEventWriter(CompactorMR.java:1084)
 
at 
org.apache.hadoop.hive.ql.txn.compactor.CompactorMR$CompactorMap.map(CompactorMR.java:995)
 
at 
org.apache.hadoop.hive.ql.txn.compactor.CompactorMR$CompactorMap.map(CompactorMR.java:958){noformat}

  was:
If first attempt of compaction task is pre-empted by yarn or execution failed 
because of environmental issues, re-attempted tasks will fail with 
FileAlreadyExistsException
{noformat}
Error: org.apache.hadoop.fs.FileAlreadyExistsException: 
/warehouse/tablespace/managed/hive/test.db/acid_table/dept=cse/_tmp_xxx/delete_delta_001_010/bucket_0
at 
org.apache.hadoop.hdfs.server.namenode.FSDirWriteFileOp.startFile(FSDirWriteFileOp.java:380)
at 
org.apache.hadoop.hdfs.server.namenode.FSNamesystem.startFileInt(FSNamesystem.java:2453)
at 
org.apache.hadoop.hdfs.server.namenode.FSNamesystem.startFile(FSNamesystem.java:2351)
 
at 
org.apache.hadoop.hdfs.server.namenode.NameNodeRpcServer.create(NameNodeRpcServer.java:774)
 
at 
org.apache.hadoop.hdfs.protocolPB.ClientNamenodeProtocolServerSideTranslatorPB.create(ClientNamenodeProtocolServerSideTranslatorPB.java:462)
 
at 
org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos$ClientNamenodeProtocol$2.callBlockingMethod(ClientNamenodeProtocolProtos.java)
 
at 

[jira] [Assigned] (HIVE-24882) Compaction task reattempt fails with FileAlreadyExistsException for DeleteEventWriter

2021-03-12 Thread Naresh P R (Jira)


 [ 
https://issues.apache.org/jira/browse/HIVE-24882?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Naresh P R reassigned HIVE-24882:
-


> Compaction task reattempt fails with FileAlreadyExistsException for 
> DeleteEventWriter
> -
>
> Key: HIVE-24882
> URL: https://issues.apache.org/jira/browse/HIVE-24882
> Project: Hive
>  Issue Type: Bug
>Reporter: Naresh P R
>Assignee: Naresh P R
>Priority: Major
>
> If first attempt of compaction task is pre-empted by yarn or execution failed 
> because of environmental issues, re-attempted tasks will fail with 
> FileAlreadyExistsException
> {noformat}
> Error: org.apache.hadoop.fs.FileAlreadyExistsException: 
> /warehouse/tablespace/managed/hive/test.db/acid_table/dept=cse/_tmp_xxx/delete_delta_001_010/bucket_0
> at 
> org.apache.hadoop.hdfs.server.namenode.FSDirWriteFileOp.startFile(FSDirWriteFileOp.java:380)
> at 
> org.apache.hadoop.hdfs.server.namenode.FSNamesystem.startFileInt(FSNamesystem.java:2453)
> at 
> org.apache.hadoop.hdfs.server.namenode.FSNamesystem.startFile(FSNamesystem.java:2351)
>  
> at 
> org.apache.hadoop.hdfs.server.namenode.NameNodeRpcServer.create(NameNodeRpcServer.java:774)
>  
> at 
> org.apache.hadoop.hdfs.protocolPB.ClientNamenodeProtocolServerSideTranslatorPB.create(ClientNamenodeProtocolServerSideTranslatorPB.java:462)
>  
> at 
> org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos$ClientNamenodeProtocol$2.callBlockingMethod(ClientNamenodeProtocolProtos.java)
>  
> at 
> org.apache.hadoop.ipc.ProtobufRpcEngine$Server$ProtoBufRpcInvoker.call(ProtobufRpcEngine.java:524)
>  
> at org.apache.hadoop.ipc.RPC$Server.call(RPC.java:1025) 
> at org.apache.hadoop.ipc.Server$RpcCall.run(Server.java:876) 
> at org.apache.hadoop.ipc.Server$RpcCall.run(Server.java:822) 
> at java.security.AccessController.doPrivileged(Native Method) 
> at javax.security.auth.Subject.doAs(Subject.java:422) 
> at 
> org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1730)
>  
> at org.apache.hadoop.ipc.Server$Handler.run(Server.java:2682) 
> at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method) 
> at 
> sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:62)
>  
> at 
> sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
>  
> at java.lang.reflect.Constructor.newInstance(Constructor.java:423) 
> at 
> org.apache.hadoop.ipc.RemoteException.instantiateException(RemoteException.java:121)
>  
> at 
> org.apache.hadoop.ipc.RemoteException.unwrapRemoteException(RemoteException.java:88)
>  at 
> org.apache.hadoop.hdfs.DFSOutputStream.newStreamForCreate(DFSOutputStream.java:278)
>  at org.apache.hadoop.hdfs.DFSClient.create(DFSClient.java:1211) at 
> org.apache.hadoop.hdfs.DFSClient.create(DFSClient.java:1190) at 
> org.apache.hadoop.hdfs.DFSClient.create(DFSClient.java:1128) at 
> org.apache.hadoop.hdfs.DistributedFileSystem$8.doCall(DistributedFileSystem.java:531)
>  at 
> org.apache.hadoop.hdfs.DistributedFileSystem$8.doCall(DistributedFileSystem.java:528)
>  at 
> org.apache.hadoop.fs.FileSystemLinkResolver.resolve(FileSystemLinkResolver.java:81)
>  at 
> org.apache.hadoop.hdfs.DistributedFileSystem.create(DistributedFileSystem.java:542)
>  
> at 
> org.apache.hadoop.hdfs.DistributedFileSystem.create(DistributedFileSystem.java:469)
>  
> at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1118) 
> at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:1098) 
> at org.apache.orc.impl.PhysicalFsWriter.(PhysicalFsWriter.java:95) 
> at org.apache.orc.impl.WriterImpl.(WriterImpl.java:177) 
> at org.apache.hadoop.hive.ql.io.orc.WriterImpl.(WriterImpl.java:94) 
> at org.apache.hadoop.hive.ql.io.orc.OrcFile.createWriter(OrcFile.java:378) 
> at 
> org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat.getRawRecordWriter(OrcOutputFormat.java:299)
>  
> at 
> org.apache.hadoop.hive.ql.txn.compactor.CompactorMR$CompactorMap.getDeleteEventWriter(CompactorMR.java:1084)
>  
> at 
> org.apache.hadoop.hive.ql.txn.compactor.CompactorMR$CompactorMap.map(CompactorMR.java:995)
>  
> at 
> org.apache.hadoop.hive.ql.txn.compactor.CompactorMR$CompactorMap.map(CompactorMR.java:958){noformat}



--
This message was sent by Atlassian Jira
(v8.3.4#803005)


[jira] [Updated] (HIVE-24877) Support X'xxxx' syntax for hexadecimal values like spark & mysql

2021-03-11 Thread Naresh P R (Jira)


 [ 
https://issues.apache.org/jira/browse/HIVE-24877?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Naresh P R updated HIVE-24877:
--
Description: 
Hive is currently not supporting following syntax

select x'abc';
{code:java}
org.apache.hadoop.hive.ql.parse.ParseException: line 2:8 cannot recognize input 
near 'x' ''abc'' '' in selection target
  at org.apache.hadoop.hive.ql.parse.ParseDriver.parse(ParseDriver.java:125)
  at org.apache.hadoop.hive.ql.parse.ParseUtils.parse(ParseUtils.java:93)
  at org.apache.hadoop.hive.ql.parse.ParseUtils.parse(ParseUtils.java:85)
  at org.apache.hadoop.hive.ql.Compiler.parse(Compiler.java:169)
  at org.apache.hadoop.hive.ql.Compiler.compile(Compiler.java:102)
  at org.apache.hadoop.hive.ql.Driver.compile(Driver.java:492)
  at org.apache.hadoop.hive.ql.Driver.compileInternal(Driver.java:445){code}
Though we have same via hex/unhex built-in UDF's, it's better to have 
{{X'value'}} and x'{{value'}} syntax support for Hive.

[https://spark.apache.org/docs/latest/sql-ref-literals.html#binary-literal]

[https://dev.mysql.com/doc/refman/8.0/en/string-functions.html#function_hex]

[https://mariadb.com/kb/en/hexadecimal-literals/]

  was:
Hive is currently not supporting following syntax

select x'abc';
{code:java}
org.apache.hadoop.hive.ql.parse.ParseException: line 2:8 cannot recognize input 
near 'x' ''abc'' '' in selection 
targetorg.apache.hadoop.hive.ql.parse.ParseException: line 2:8 cannot recognize 
input near 'x' ''abc'' '' in selection target at 
org.apache.hadoop.hive.ql.parse.ParseDriver.parse(ParseDriver.java:125) at 
org.apache.hadoop.hive.ql.parse.ParseUtils.parse(ParseUtils.java:93) at 
org.apache.hadoop.hive.ql.parse.ParseUtils.parse(ParseUtils.java:85) at 
org.apache.hadoop.hive.ql.Compiler.parse(Compiler.java:169) at 
org.apache.hadoop.hive.ql.Compiler.compile(Compiler.java:102) at 
org.apache.hadoop.hive.ql.Driver.compile(Driver.java:492) at 
org.apache.hadoop.hive.ql.Driver.compileInternal(Driver.java:445){code}
Though we have same via hex/unhex built-in UDF's, it's better to have 
{{X'value'}} and x'{{value'}} syntax support for Hive.

[https://spark.apache.org/docs/latest/sql-ref-literals.html#binary-literal]

[https://dev.mysql.com/doc/refman/8.0/en/string-functions.html#function_hex]

[https://mariadb.com/kb/en/hexadecimal-literals/]


> Support X'' syntax for hexadecimal values like spark & mysql
> 
>
> Key: HIVE-24877
> URL: https://issues.apache.org/jira/browse/HIVE-24877
> Project: Hive
>  Issue Type: New Feature
>Reporter: Naresh P R
>Priority: Minor
>
> Hive is currently not supporting following syntax
> select x'abc';
> {code:java}
> org.apache.hadoop.hive.ql.parse.ParseException: line 2:8 cannot recognize 
> input near 'x' ''abc'' '' in selection target
>   at org.apache.hadoop.hive.ql.parse.ParseDriver.parse(ParseDriver.java:125)
>   at org.apache.hadoop.hive.ql.parse.ParseUtils.parse(ParseUtils.java:93)
>   at org.apache.hadoop.hive.ql.parse.ParseUtils.parse(ParseUtils.java:85)
>   at org.apache.hadoop.hive.ql.Compiler.parse(Compiler.java:169)
>   at org.apache.hadoop.hive.ql.Compiler.compile(Compiler.java:102)
>   at org.apache.hadoop.hive.ql.Driver.compile(Driver.java:492)
>   at org.apache.hadoop.hive.ql.Driver.compileInternal(Driver.java:445){code}
> Though we have same via hex/unhex built-in UDF's, it's better to have 
> {{X'value'}} and x'{{value'}} syntax support for Hive.
> [https://spark.apache.org/docs/latest/sql-ref-literals.html#binary-literal]
> [https://dev.mysql.com/doc/refman/8.0/en/string-functions.html#function_hex]
> [https://mariadb.com/kb/en/hexadecimal-literals/]



--
This message was sent by Atlassian Jira
(v8.3.4#803005)


[jira] [Updated] (HIVE-24877) Support X'xxxx' syntax for hexadecimal values like spark & mysql

2021-03-11 Thread Naresh P R (Jira)


 [ 
https://issues.apache.org/jira/browse/HIVE-24877?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Naresh P R updated HIVE-24877:
--
Description: 
Hive is currently not supporting following syntax

select x'abc';
{code:java}
org.apache.hadoop.hive.ql.parse.ParseException: line 2:8 cannot recognize input 
near 'x' ''abc'' '' in selection 
targetorg.apache.hadoop.hive.ql.parse.ParseException: line 2:8 cannot recognize 
input near 'x' ''abc'' '' in selection target at 
org.apache.hadoop.hive.ql.parse.ParseDriver.parse(ParseDriver.java:125) at 
org.apache.hadoop.hive.ql.parse.ParseUtils.parse(ParseUtils.java:93) at 
org.apache.hadoop.hive.ql.parse.ParseUtils.parse(ParseUtils.java:85) at 
org.apache.hadoop.hive.ql.Compiler.parse(Compiler.java:169) at 
org.apache.hadoop.hive.ql.Compiler.compile(Compiler.java:102) at 
org.apache.hadoop.hive.ql.Driver.compile(Driver.java:492) at 
org.apache.hadoop.hive.ql.Driver.compileInternal(Driver.java:445){code}
Though we have same via hex/unhex built-in UDF's, it's better to have 
{{X'value'}} and x'{{value'}} syntax support for Hive.

[https://spark.apache.org/docs/latest/sql-ref-literals.html#binary-literal]

[https://dev.mysql.com/doc/refman/8.0/en/string-functions.html#function_hex]

[https://mariadb.com/kb/en/hexadecimal-literals/]

  was:
Hive is currently not supporting following syntax

select x'abc';
{code:java}
org.apache.hadoop.hive.ql.parse.ParseException: line 2:8 cannot recognize input 
near 'x' ''abc'' '' in selection target 
org.apache.hadoop.hive.ql.parse.ParseException: line 2:8 cannot recognize input 
near 'x' ''31FECC'' '' in selection target at 
org.apache.hadoop.hive.ql.parse.ParseDriver.parse(ParseDriver.java:125) at 
org.apache.hadoop.hive.ql.parse.ParseUtils.parse(ParseUtils.java:93) at 
org.apache.hadoop.hive.ql.parse.ParseUtils.parse(ParseUtils.java:85) at 
org.apache.hadoop.hive.ql.Compiler.parse(Compiler.java:169) at 
org.apache.hadoop.hive.ql.Compiler.compile(Compiler.java:102) at 
org.apache.hadoop.hive.ql.Driver.compile(Driver.java:492) at 
org.apache.hadoop.hive.ql.Driver.compileInternal(Driver.java:445){code}
Though we have same via hex/unhex built-in UDF's, it's better to have 
{{X'value'}} and x'{{value'}} syntax support for Hive.

[https://spark.apache.org/docs/latest/sql-ref-literals.html#binary-literal]

[https://dev.mysql.com/doc/refman/8.0/en/string-functions.html#function_hex]

https://mariadb.com/kb/en/hexadecimal-literals/


> Support X'' syntax for hexadecimal values like spark & mysql
> 
>
> Key: HIVE-24877
> URL: https://issues.apache.org/jira/browse/HIVE-24877
> Project: Hive
>  Issue Type: New Feature
>Reporter: Naresh P R
>Priority: Minor
>
> Hive is currently not supporting following syntax
> select x'abc';
> {code:java}
> org.apache.hadoop.hive.ql.parse.ParseException: line 2:8 cannot recognize 
> input near 'x' ''abc'' '' in selection 
> targetorg.apache.hadoop.hive.ql.parse.ParseException: line 2:8 cannot 
> recognize input near 'x' ''abc'' '' in selection target at 
> org.apache.hadoop.hive.ql.parse.ParseDriver.parse(ParseDriver.java:125) at 
> org.apache.hadoop.hive.ql.parse.ParseUtils.parse(ParseUtils.java:93) at 
> org.apache.hadoop.hive.ql.parse.ParseUtils.parse(ParseUtils.java:85) at 
> org.apache.hadoop.hive.ql.Compiler.parse(Compiler.java:169) at 
> org.apache.hadoop.hive.ql.Compiler.compile(Compiler.java:102) at 
> org.apache.hadoop.hive.ql.Driver.compile(Driver.java:492) at 
> org.apache.hadoop.hive.ql.Driver.compileInternal(Driver.java:445){code}
> Though we have same via hex/unhex built-in UDF's, it's better to have 
> {{X'value'}} and x'{{value'}} syntax support for Hive.
> [https://spark.apache.org/docs/latest/sql-ref-literals.html#binary-literal]
> [https://dev.mysql.com/doc/refman/8.0/en/string-functions.html#function_hex]
> [https://mariadb.com/kb/en/hexadecimal-literals/]



--
This message was sent by Atlassian Jira
(v8.3.4#803005)


[jira] [Commented] (HIVE-24757) Add UDF To Obtain HS2 Host

2021-02-09 Thread Naresh P R (Jira)


[ 
https://issues.apache.org/jira/browse/HIVE-24757?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=17282068#comment-17282068
 ] 

Naresh P R commented on HIVE-24757:
---

I agree UDF would be better, however can u check "set 
hive.server2.thrift.bind.host;" helps ?

> Add UDF To Obtain HS2 Host
> --
>
> Key: HIVE-24757
> URL: https://issues.apache.org/jira/browse/HIVE-24757
> Project: Hive
>  Issue Type: New Feature
>Reporter: David Mollitor
>Priority: Minor
>
> It can be confusing to troubleshoot an issue in Hive because it's not very 
> easy to determine which instance a connection is made to (in multi-HS2 
> environment).
> Please add a UDF that displays the hostname of the currently connected HS2 
> instance.



--
This message was sent by Atlassian Jira
(v8.3.4#803005)


[jira] [Updated] (HIVE-24716) jQuery file symlink is replaced by physical file which requires changes on both the places

2021-02-01 Thread Naresh P R (Jira)


 [ 
https://issues.apache.org/jira/browse/HIVE-24716?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Naresh P R updated HIVE-24716:
--
Description: 
HIVE-22066 replaced symlink

llap-server/src/main/resources/hive-webapps/llap/js/jquery.min.js -> 
service/src/resources/hive-webapps/static/js/jquery.min.js

with a physical file, whenever jQuery version gets upgraded, same changes needs 
to be done on both places

  was:
HIVE-22099 replaced symlink

llap-server/src/main/resources/hive-webapps/llap/js/jquery.min.js -> 
service/src/resources/hive-webapps/static/js/jquery.min.js

with a physical file, whenever jQuery version gets upgraded, same changes needs 
to be done on both places


> jQuery file symlink is replaced by physical file which requires changes on 
> both the places
> --
>
> Key: HIVE-24716
> URL: https://issues.apache.org/jira/browse/HIVE-24716
> Project: Hive
>  Issue Type: Bug
>Reporter: Naresh P R
>Priority: Major
>
> HIVE-22066 replaced symlink
> llap-server/src/main/resources/hive-webapps/llap/js/jquery.min.js -> 
> service/src/resources/hive-webapps/static/js/jquery.min.js
> with a physical file, whenever jQuery version gets upgraded, same changes 
> needs to be done on both places



--
This message was sent by Atlassian Jira
(v8.3.4#803005)


[jira] [Commented] (HIVE-24628) Decimal values are displayed as scientific notation in beeline

2021-01-14 Thread Naresh P R (Jira)


[ 
https://issues.apache.org/jira/browse/HIVE-24628?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=17265046#comment-17265046
 ] 

Naresh P R commented on HIVE-24628:
---

Thanks for the review and merge [~mgergely]

> Decimal values are displayed as scientific notation in beeline
> --
>
> Key: HIVE-24628
> URL: https://issues.apache.org/jira/browse/HIVE-24628
> Project: Hive
>  Issue Type: Bug
>  Components: Beeline
>Reporter: Naresh P R
>Assignee: Naresh P R
>Priority: Major
>  Labels: pull-request-available
>  Time Spent: 10m
>  Remaining Estimate: 0h
>
> As we are using BigDecimal.toString() returns scientific notation instead of 
> original text, which confuse customer. It should be changed to 
> toPlainString() at here
> [https://github.com/apache/hive/blob/master/beeline/src/java/org/apache/hive/beeline/Rows.java#L165]
> Repro steps:
>  
> {code:java}
> beeline> select cast(0 as decimal(20,10));
> //output
> 0E-10 
> {code}
>  



--
This message was sent by Atlassian Jira
(v8.3.4#803005)


[jira] [Resolved] (HIVE-24628) Decimal values are displayed as scientific notation in beeline

2021-01-14 Thread Naresh P R (Jira)


 [ 
https://issues.apache.org/jira/browse/HIVE-24628?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Naresh P R resolved HIVE-24628.
---
Fix Version/s: 4.0.0
   Resolution: Fixed

> Decimal values are displayed as scientific notation in beeline
> --
>
> Key: HIVE-24628
> URL: https://issues.apache.org/jira/browse/HIVE-24628
> Project: Hive
>  Issue Type: Bug
>  Components: Beeline
>Reporter: Naresh P R
>Assignee: Naresh P R
>Priority: Major
>  Labels: pull-request-available
> Fix For: 4.0.0
>
>  Time Spent: 10m
>  Remaining Estimate: 0h
>
> As we are using BigDecimal.toString() returns scientific notation instead of 
> original text, which confuse customer. It should be changed to 
> toPlainString() at here
> [https://github.com/apache/hive/blob/master/beeline/src/java/org/apache/hive/beeline/Rows.java#L165]
> Repro steps:
>  
> {code:java}
> beeline> select cast(0 as decimal(20,10));
> //output
> 0E-10 
> {code}
>  



--
This message was sent by Atlassian Jira
(v8.3.4#803005)


[jira] [Updated] (HIVE-24634) Create table if not exists should validate whether table exists before doAuth()

2021-01-13 Thread Naresh P R (Jira)


 [ 
https://issues.apache.org/jira/browse/HIVE-24634?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Naresh P R updated HIVE-24634:
--
Description: 
In Hive + Ranger cluster, Create table if not exist hive-ranger would validate 
privileges over complete files in table location even thought table already 
exist.

Table exist check should be validated before doAuthorization in compile.
{code:java}
 at 
org.apache.hadoop.hive.common.FileUtils.isActionPermittedForFileHierarchy(FileUtils.java:452)
 
 at 
org.apache.ranger.authorization.hive.authorizer.RangerHiveAuthorizer.isURIAccessAllowed(RangerHiveAuthorizer.java:1428)
 at 
org.apache.ranger.authorization.hive.authorizer.RangerHiveAuthorizer.checkPrivileges(RangerHiveAuthorizer.java:291)
 at org.apache.hadoop.hive.ql.Driver.doAuthorizationV2(Driver.java:1337)
 at org.apache.hadoop.hive.ql.Driver.doAuthorization(Driver.java:1101)
 at org.apache.hadoop.hive.ql.Driver.compile(Driver.java:710){code}

  was:
In Hive + Ranger cluster, Create table if not exist hive-ranger would validate 
privileges over complete files in table location even thought table already 
exist.

Table exist check should be validated before doAuthorization in compile.
{code:java}
at 
org.apache.ranger.authorization.hive.authorizer.RangerHiveAuthorizer.checkPrivileges(RangerHiveAuthorizer.java:291)
 at org.apache.hadoop.hive.ql.Driver.doAuthorizationV2(Driver.java:1337)
 at org.apache.hadoop.hive.ql.Driver.doAuthorization(Driver.java:1101)
 at org.apache.hadoop.hive.ql.Driver.compile(Driver.java:710){code}


> Create table if not exists should validate whether table exists before 
> doAuth()
> ---
>
> Key: HIVE-24634
> URL: https://issues.apache.org/jira/browse/HIVE-24634
> Project: Hive
>  Issue Type: Bug
>Reporter: Naresh P R
>Priority: Major
>
> In Hive + Ranger cluster, Create table if not exist hive-ranger would 
> validate privileges over complete files in table location even thought table 
> already exist.
> Table exist check should be validated before doAuthorization in compile.
> {code:java}
>  at 
> org.apache.hadoop.hive.common.FileUtils.isActionPermittedForFileHierarchy(FileUtils.java:452)
>  
>  at 
> org.apache.ranger.authorization.hive.authorizer.RangerHiveAuthorizer.isURIAccessAllowed(RangerHiveAuthorizer.java:1428)
>  at 
> org.apache.ranger.authorization.hive.authorizer.RangerHiveAuthorizer.checkPrivileges(RangerHiveAuthorizer.java:291)
>  at org.apache.hadoop.hive.ql.Driver.doAuthorizationV2(Driver.java:1337)
>  at org.apache.hadoop.hive.ql.Driver.doAuthorization(Driver.java:1101)
>  at org.apache.hadoop.hive.ql.Driver.compile(Driver.java:710){code}



--
This message was sent by Atlassian Jira
(v8.3.4#803005)


[jira] [Updated] (HIVE-24634) Create table if not exists should validate whether table exists before doAuth()

2021-01-13 Thread Naresh P R (Jira)


 [ 
https://issues.apache.org/jira/browse/HIVE-24634?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Naresh P R updated HIVE-24634:
--
Description: 
In Hive + Ranger cluster, Create table if not exist hive-ranger would validate 
privileges over complete files in table location even thought table already 
exist.

Table exist check should be validated before doAuthorization in compile.
{code:java}
at 
org.apache.ranger.authorization.hive.authorizer.RangerHiveAuthorizer.checkPrivileges(RangerHiveAuthorizer.java:291)
 at org.apache.hadoop.hive.ql.Driver.doAuthorizationV2(Driver.java:1337)
 at org.apache.hadoop.hive.ql.Driver.doAuthorization(Driver.java:1101)
 at org.apache.hadoop.hive.ql.Driver.compile(Driver.java:710){code}

  was:
In Hive + Ranger cluster, Create table if not exist hive-ranger would validate 
privileges over complete files in table location even thought table already 
exist.

Table exist check should be validated before doAuthorization in compile.
at 
org.apache.ranger.authorization.hive.authorizer.RangerHiveAuthorizer.checkPrivileges(RangerHiveAuthorizer.java:291)
at org.apache.hadoop.hive.ql.Driver.doAuthorizationV2(Driver.java:1337)
at org.apache.hadoop.hive.ql.Driver.doAuthorization(Driver.java:1101)
at org.apache.hadoop.hive.ql.Driver.compile(Driver.java:710)


> Create table if not exists should validate whether table exists before 
> doAuth()
> ---
>
> Key: HIVE-24634
> URL: https://issues.apache.org/jira/browse/HIVE-24634
> Project: Hive
>  Issue Type: Bug
>Reporter: Naresh P R
>Priority: Major
>
> In Hive + Ranger cluster, Create table if not exist hive-ranger would 
> validate privileges over complete files in table location even thought table 
> already exist.
> Table exist check should be validated before doAuthorization in compile.
> {code:java}
> at 
> org.apache.ranger.authorization.hive.authorizer.RangerHiveAuthorizer.checkPrivileges(RangerHiveAuthorizer.java:291)
>  at org.apache.hadoop.hive.ql.Driver.doAuthorizationV2(Driver.java:1337)
>  at org.apache.hadoop.hive.ql.Driver.doAuthorization(Driver.java:1101)
>  at org.apache.hadoop.hive.ql.Driver.compile(Driver.java:710){code}



--
This message was sent by Atlassian Jira
(v8.3.4#803005)


[jira] [Updated] (HIVE-24523) Vectorized read path for LazySimpleSerde does not honor the SERDEPROPERTIES for timestamp

2021-01-13 Thread Naresh P R (Jira)


 [ 
https://issues.apache.org/jira/browse/HIVE-24523?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Naresh P R updated HIVE-24523:
--
Fix Version/s: 4.0.0

> Vectorized read path for LazySimpleSerde does not honor the SERDEPROPERTIES 
> for timestamp
> -
>
> Key: HIVE-24523
> URL: https://issues.apache.org/jira/browse/HIVE-24523
> Project: Hive
>  Issue Type: Bug
>  Components: Vectorization
>Affects Versions: 3.2.0, 4.0.0
>Reporter: Rajkumar Singh
>Assignee: Naresh P R
>Priority: Major
>  Labels: pull-request-available
> Fix For: 4.0.0
>
>  Time Spent: 1h
>  Remaining Estimate: 0h
>
> Steps to repro:
> {code:java}
>   create external  table tstable(date_created timestamp)   ROW FORMAT SERDE 
> 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'   WITH SERDEPROPERTIES ( 
>  'timestamp.formats'='MMddHHmmss') stored as textfile;
> cat sampledata 
> 2020120517
> hdfs dfs -put sampledata /warehouse/tablespace/external/hive/tstable
> {code}
> disable fetch task conversion and run select * from tstable which produce no 
> results, disabling the set 
> hive.vectorized.use.vector.serde.deserialize=false; return the expected 
> output.
> while parsing the string to timestamp 
> https://github.com/apache/hive/blob/master/serde/src/java/org/apache/hadoop/hive/serde2/lazy/fast/LazySimpleDeserializeRead.java#L812
>  does not set the DateTimeFormatter which results IllegalArgumentException 
> while parsing the timestamp through TimestampUtils.stringToTimestamp(strValue)



--
This message was sent by Atlassian Jira
(v8.3.4#803005)


[jira] [Updated] (HIVE-24628) Decimal values are displayed as scientific notation in beeline

2021-01-13 Thread Naresh P R (Jira)


 [ 
https://issues.apache.org/jira/browse/HIVE-24628?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Naresh P R updated HIVE-24628:
--
Component/s: Beeline

> Decimal values are displayed as scientific notation in beeline
> --
>
> Key: HIVE-24628
> URL: https://issues.apache.org/jira/browse/HIVE-24628
> Project: Hive
>  Issue Type: Bug
>  Components: Beeline
>Reporter: Naresh P R
>Assignee: Naresh P R
>Priority: Major
>  Labels: pull-request-available
>
> As we are using BigDecimal.toString() returns scientific notation instead of 
> original text, which confuse customer. It should be changed to 
> toPlainString() at here
> [https://github.com/apache/hive/blob/master/beeline/src/java/org/apache/hive/beeline/Rows.java#L165]
> Repro steps:
>  
> {code:java}
> beeline> select cast(0 as decimal(20,10));
> //output
> 0E-10 
> {code}
>  



--
This message was sent by Atlassian Jira
(v8.3.4#803005)


[jira] [Updated] (HIVE-24628) Decimal values are displayed as scientific notation in beeline

2021-01-12 Thread Naresh P R (Jira)


 [ 
https://issues.apache.org/jira/browse/HIVE-24628?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Naresh P R updated HIVE-24628:
--
Labels: pull-request-available  (was: )

> Decimal values are displayed as scientific notation in beeline
> --
>
> Key: HIVE-24628
> URL: https://issues.apache.org/jira/browse/HIVE-24628
> Project: Hive
>  Issue Type: Bug
>Reporter: Naresh P R
>Assignee: Naresh P R
>Priority: Major
>  Labels: pull-request-available
>
> As we are using BigDecimal.toString() returns scientific notation instead of 
> original text, which confuse customer. It should be changed to 
> toPlainString() at here
> [https://github.com/apache/hive/blob/master/beeline/src/java/org/apache/hive/beeline/Rows.java#L165]
> Repro steps:
>  
> {code:java}
> beeline> select cast(0 as decimal(20,10));
> //output
> 0E-10 
> {code}
>  



--
This message was sent by Atlassian Jira
(v8.3.4#803005)


[jira] [Updated] (HIVE-24628) Decimal values are displayed as scientific notation in beeline

2021-01-12 Thread Naresh P R (Jira)


 [ 
https://issues.apache.org/jira/browse/HIVE-24628?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Naresh P R updated HIVE-24628:
--
Summary: Decimal values are displayed as scientific notation in beeline  
(was: Decimal values are displayed as Scientific notation in beeline)

> Decimal values are displayed as scientific notation in beeline
> --
>
> Key: HIVE-24628
> URL: https://issues.apache.org/jira/browse/HIVE-24628
> Project: Hive
>  Issue Type: Bug
>Reporter: Naresh P R
>Assignee: Naresh P R
>Priority: Major
>
> As we are using BigDecimal.toString() returns scientific notation instead of 
> original text, which confuse customer. It should be changed to 
> toPlainString() at here
> [https://github.com/apache/hive/blob/master/beeline/src/java/org/apache/hive/beeline/Rows.java#L165]
> Repro steps:
>  
> {code:java}
> beeline> select cast(0 as decimal(20,10));
> //output
> 0E-10 
> {code}
>  



--
This message was sent by Atlassian Jira
(v8.3.4#803005)


[jira] [Assigned] (HIVE-24628) Decimal values are displayed as Scientific notation in beeline

2021-01-12 Thread Naresh P R (Jira)


 [ 
https://issues.apache.org/jira/browse/HIVE-24628?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Naresh P R reassigned HIVE-24628:
-


> Decimal values are displayed as Scientific notation in beeline
> --
>
> Key: HIVE-24628
> URL: https://issues.apache.org/jira/browse/HIVE-24628
> Project: Hive
>  Issue Type: Bug
>Reporter: Naresh P R
>Assignee: Naresh P R
>Priority: Major
>
> As we are using BigDecimal.toString() returns scientific notation instead of 
> original text, which confuse customer. It should be changed to 
> toPlainString() at here
> [https://github.com/apache/hive/blob/master/beeline/src/java/org/apache/hive/beeline/Rows.java#L165]
> Repro steps:
>  
> {code:java}
> beeline> select cast(0 as decimal(20,10));
> //output
> 0E-10 
> {code}
>  



--
This message was sent by Atlassian Jira
(v8.3.4#803005)


[jira] [Assigned] (HIVE-24523) Vectorized read path for LazySimpleSerde does not honor the SERDEPROPERTIES for timestamp

2021-01-04 Thread Naresh P R (Jira)


 [ 
https://issues.apache.org/jira/browse/HIVE-24523?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Naresh P R reassigned HIVE-24523:
-

Assignee: Naresh P R

> Vectorized read path for LazySimpleSerde does not honor the SERDEPROPERTIES 
> for timestamp
> -
>
> Key: HIVE-24523
> URL: https://issues.apache.org/jira/browse/HIVE-24523
> Project: Hive
>  Issue Type: Bug
>  Components: Vectorization
>Affects Versions: 3.2.0, 4.0.0
>Reporter: Rajkumar Singh
>Assignee: Naresh P R
>Priority: Major
>
> Steps to repro:
> {code:java}
>   create external  table tstable(date_created timestamp)   ROW FORMAT SERDE 
> 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'   WITH SERDEPROPERTIES ( 
>  'timestamp.formats'='MMddHHmmss') stored as textfile;
> cat sampledata 
> 2020120517
> hdfs dfs -put sampledata /warehouse/tablespace/external/hive/tstable
> {code}
> disable fetch task conversion and run select * from tstable which produce no 
> results, disabling the set 
> hive.vectorized.use.vector.serde.deserialize=false; return the expected 
> output.
> while parsing the string to timestamp 
> https://github.com/apache/hive/blob/master/serde/src/java/org/apache/hadoop/hive/serde2/lazy/fast/LazySimpleDeserializeRead.java#L812
>  does not set the DateTimeFormatter which results IllegalArgumentException 
> while parsing the timestamp through TimestampUtils.stringToTimestamp(strValue)



--
This message was sent by Atlassian Jira
(v8.3.4#803005)


[jira] [Commented] (HIVE-24523) Vectorized read path for LazySimpleSerde does not honor the SERDEPROPERTIES for timestamp

2020-12-26 Thread Naresh P R (Jira)


[ 
https://issues.apache.org/jira/browse/HIVE-24523?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=17255161#comment-17255161
 ] 

Naresh P R commented on HIVE-24523:
---

[~Rajkumar Singh] If you are not working, can i work on this ?

> Vectorized read path for LazySimpleSerde does not honor the SERDEPROPERTIES 
> for timestamp
> -
>
> Key: HIVE-24523
> URL: https://issues.apache.org/jira/browse/HIVE-24523
> Project: Hive
>  Issue Type: Bug
>  Components: Vectorization
>Affects Versions: 3.2.0, 4.0.0
>Reporter: Rajkumar Singh
>Priority: Major
>
> Steps to repro:
> {code:java}
>   create external  table tstable(date_created timestamp)   ROW FORMAT SERDE 
> 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'   WITH SERDEPROPERTIES ( 
>  'timestamp.formats'='MMddHHmmss') stored as textfile;
> cat sampledata 
> 2020120517
> hdfs dfs -put sampledata /warehouse/tablespace/external/hive/tstable
> {code}
> disable fetch task conversion and run select * from tstable which produce no 
> results, disabling the set 
> hive.vectorized.use.vector.serde.deserialize=false; return the expected 
> output.
> while parsing the string to timestamp 
> https://github.com/apache/hive/blob/master/serde/src/java/org/apache/hadoop/hive/serde2/lazy/fast/LazySimpleDeserializeRead.java#L812
>  does not set the DateTimeFormatter which results IllegalArgumentException 
> while parsing the timestamp through TimestampUtils.stringToTimestamp(strValue)



--
This message was sent by Atlassian Jira
(v8.3.4#803005)


[jira] [Resolved] (HIVE-24305) avro decimal schema is not properly populating scale/precision if value is enclosed in quote

2020-12-24 Thread Naresh P R (Jira)


 [ 
https://issues.apache.org/jira/browse/HIVE-24305?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Naresh P R resolved HIVE-24305.
---
Fix Version/s: 4.0.0
   Resolution: Fixed

> avro decimal schema is not properly populating scale/precision if value is 
> enclosed in quote
> 
>
> Key: HIVE-24305
> URL: https://issues.apache.org/jira/browse/HIVE-24305
> Project: Hive
>  Issue Type: Bug
>Reporter: Naresh P R
>Assignee: Naresh P R
>Priority: Major
>  Labels: pull-request-available
> Fix For: 4.0.0
>
>  Time Spent: 40m
>  Remaining Estimate: 0h
>
> {code:java}
> CREATE TABLE test_quoted_scale_precision STORED AS AVRO TBLPROPERTIES 
> ('avro.schema.literal'='{"type":"record","name":"DecimalTest","namespace":"com.example.test","fields":[{"name":"Decimal24_6","type":["null",{"type":"bytes","logicalType":"decimal","precision":24,"scale":"6"}]}]}');
>  
> desc test_quoted_scale_precision;
> // current output
> decimal24_6 decimal(24,0)
> // expected output
> decimal24_6 decimal(24,6){code}
>  



--
This message was sent by Atlassian Jira
(v8.3.4#803005)


[jira] [Commented] (HIVE-24305) avro decimal schema is not properly populating scale/precision if value is enclosed in quote

2020-12-24 Thread Naresh P R (Jira)


[ 
https://issues.apache.org/jira/browse/HIVE-24305?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=17254737#comment-17254737
 ] 

Naresh P R commented on HIVE-24305:
---

Thanks for the review & commit [~lpinter]. 

 

> avro decimal schema is not properly populating scale/precision if value is 
> enclosed in quote
> 
>
> Key: HIVE-24305
> URL: https://issues.apache.org/jira/browse/HIVE-24305
> Project: Hive
>  Issue Type: Bug
>Reporter: Naresh P R
>Assignee: Naresh P R
>Priority: Major
>  Labels: pull-request-available
>  Time Spent: 40m
>  Remaining Estimate: 0h
>
> {code:java}
> CREATE TABLE test_quoted_scale_precision STORED AS AVRO TBLPROPERTIES 
> ('avro.schema.literal'='{"type":"record","name":"DecimalTest","namespace":"com.example.test","fields":[{"name":"Decimal24_6","type":["null",{"type":"bytes","logicalType":"decimal","precision":24,"scale":"6"}]}]}');
>  
> desc test_quoted_scale_precision;
> // current output
> decimal24_6 decimal(24,0)
> // expected output
> decimal24_6 decimal(24,6){code}
>  



--
This message was sent by Atlassian Jira
(v8.3.4#803005)


[jira] [Updated] (HIVE-24433) AutoCompaction is not getting triggered for CamelCase Partition Values

2020-11-26 Thread Naresh P R (Jira)


 [ 
https://issues.apache.org/jira/browse/HIVE-24433?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Naresh P R updated HIVE-24433:
--
Description: 
PartionKeyValue is getting converted into lowerCase in below 2 places.

[https://github.com/apache/hive/blob/master/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/txn/TxnHandler.java#L2728]

[https://github.com/apache/hive/blob/master/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/txn/TxnHandler.java#L2851]

Because of which TXN_COMPONENTS & HIVE_LOCKS tables are not having entries from 
proper partition values.

When query completes, the entry moves from TXN_COMPONENTS to 
COMPLETED_TXN_COMPONENTS. Hive AutoCompaction will not recognize the partition 
& considers it as invalid partition
{code:java}
create table abc(name string) partitioned by(city string) stored as orc 
tblproperties('transactional'='true');
insert into abc partition(city='Bangalore') values('aaa');
{code}
Example entry in COMPLETED_TXN_COMPONENTS
{noformat}
+---+--++---+-+-+---+
| CTC_TXNID | CTC_DATABASE | CTC_TABLE          | CTC_PARTITION     | 
CTC_TIMESTAMP       | CTC_WRITEID | CTC_UPDATE_DELETE |
+---+--++---+-+-+---+
|         2 | default      | abc    | city=bangalore    | 2020-11-25 09:26:59 | 
          1 | N                 |
+---+--++---+-+-+---+
{noformat}
 

AutoCompaction fails to get triggered with below error
{code:java}
2020-11-25T09:35:10,364 INFO [Thread-9]: compactor.Initiator 
(Initiator.java:run(98)) - Checking to see if we should compact 
default.abc.city=bangalore
2020-11-25T09:35:10,380 INFO [Thread-9]: compactor.Initiator 
(Initiator.java:run(155)) - Can't find partition 
default.compaction_test.city=bangalore, assuming it has been dropped and moving 
on{code}
I verifed below 4 SQL's with my PR, those all produced correct PartitionKeyValue

i.e, COMPLETED_TXN_COMPONENTS.CTC_PARTITION="city=Bangalore"
{code:java}
insert into table abc PARTITION(CitY='Bangalore') values('Dan');
insert overwrite table abc partition(CiTy='Bangalore') select Name from abc;
update table abc set Name='xy' where CiTy='Bangalore';
delete from abc where CiTy='Bangalore';{code}

  was:
PartionKeyValue is getting converted into lowerCase in below 2 places.

[https://github.com/apache/hive/blob/master/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/txn/TxnHandler.java#L2728]

[https://github.com/apache/hive/blob/master/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/txn/TxnHandler.java#L2851]

Because of which TXN_COMPONENTS & HIVE_LOCKS tables are not having entries from 
proper partition values.

When query completes, the entry moves from TXN_COMPONENTS to 
COMPLETED_TXN_COMPONENTS. Hive AutoCompaction will not recognize the partition 
& considers it as invalid partition
{code:java}
create table abc(name string) partitioned by(city string) stored as orc 
tblproperties('transactional'='true');
insert into abc partition(city='Bangalore') values('aaa');
{code}
Example entry in COMPLETED_TXN_COMPONENTS
{noformat}
+---+--++---+-+-+---+
| CTC_TXNID | CTC_DATABASE | CTC_TABLE          | CTC_PARTITION     | 
CTC_TIMESTAMP       | CTC_WRITEID | CTC_UPDATE_DELETE |
+---+--++---+-+-+---+
|         2 | default      | abc    | city=bangalore    | 2020-11-25 09:26:59 | 
          1 | N                 |
+---+--++---+-+-+---+
{noformat}
 

AutoCompaction fails to get triggered with below error
{code:java}
2020-11-25T09:35:10,364 INFO [Thread-9]: compactor.Initiator 
(Initiator.java:run(98)) - Checking to see if we should compact 
default.abc.city=bangalore
2020-11-25T09:35:10,380 INFO [Thread-9]: compactor.Initiator 
(Initiator.java:run(155)) - Can't find partition 
default.compaction_test.city=bhubaneshwar, assuming it has been dropped and 
moving on{code}
I verifed below 4 SQL's with my PR, those all produced correct PartitionKeyValue

i.e, COMPLETED_TXN_COMPONENTS.CTC_PARTITION="city=Bangalore"
{code:java}
insert into table abc PARTITION(CitY='Bangalore') values('Dan');
insert overwrite table abc partition(CiTy='Bangalore') select Name from abc;
update table abc set Name='xy' where CiTy='Bangalore';
delete from abc where CiTy='Bangalore';{code}


> AutoCompaction is not getting 

[jira] [Updated] (HIVE-24433) AutoCompaction is not getting triggered for CamelCase Partition Values

2020-11-25 Thread Naresh P R (Jira)


 [ 
https://issues.apache.org/jira/browse/HIVE-24433?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Naresh P R updated HIVE-24433:
--
Description: 
PartionKeyValue is getting converted into lowerCase in below 2 places.

[https://github.com/apache/hive/blob/master/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/txn/TxnHandler.java#L2728]

[https://github.com/apache/hive/blob/master/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/txn/TxnHandler.java#L2851]

Because of which TXN_COMPONENTS & HIVE_LOCKS tables are not having entries from 
proper partition values.

When query completes, the entry moves from TXN_COMPONENTS to 
COMPLETED_TXN_COMPONENTS. Hive AutoCompaction will not recognize the partition 
& considers it as invalid partition
{code:java}
create table abc(name string) partitioned by(city string) stored as orc 
tblproperties('transactional'='true');
insert into abc partition(city='Bangalore') values('aaa');
{code}
Example entry in COMPLETED_TXN_COMPONENTS
{noformat}
+---+--++---+-+-+---+
| CTC_TXNID | CTC_DATABASE | CTC_TABLE          | CTC_PARTITION     | 
CTC_TIMESTAMP       | CTC_WRITEID | CTC_UPDATE_DELETE |
+---+--++---+-+-+---+
|         2 | default      | abc    | city=bangalore    | 2020-11-25 09:26:59 | 
          1 | N                 |
+---+--++---+-+-+---+
{noformat}
 

AutoCompaction fails to get triggered with below error
{code:java}
2020-11-25T09:35:10,364 INFO [Thread-9]: compactor.Initiator 
(Initiator.java:run(98)) - Checking to see if we should compact 
default.abc.city=bangalore
2020-11-25T09:35:10,380 INFO [Thread-9]: compactor.Initiator 
(Initiator.java:run(155)) - Can't find partition 
default.compaction_test.city=bhubaneshwar, assuming it has been dropped and 
moving on{code}
I verifed below 4 SQL's with my PR, those all produced correct PartitionKeyValue

i.e, COMPLETED_TXN_COMPONENTS.CTC_PARTITION="city=Bangalore"
{code:java}
insert into table abc PARTITION(CitY='Bangalore') values('Dan');
insert overwrite table abc partition(CiTy='Bangalore') select Name from abc;
update table abc set Name='xy' where CiTy='Bangalore';
delete from abc where CiTy='Bangalore';{code}

  was:
PartionKeyValue is getting converted into lowerCase in below 2 places.

[https://github.com/apache/hive/blob/master/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/txn/TxnHandler.java#L2728]

[https://github.com/apache/hive/blob/master/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/txn/TxnHandler.java#L2851]

Because of which TXN_COMPONENTS & HIVE_LOCKS tables are not having entries from 
proper partition values.

When query completes, the entry moves from TXN_COMPONENTS to 
COMPLETED_TXN_COMPONENTS. Hive AutoCompaction will not recognize the partition 
& considers it as invalid partition
{code:java}
create table abc(name string) partitioned by(city string) stored as orc 
tblproperties('transactional'='true');
insert into abc partition(city='Bangalore') values('aaa');
{code}
Example entry in COMPLETED_TXN_COMPONENTS
{noformat}
+---+--++---+-+-+---+
| CTC_TXNID | CTC_DATABASE | CTC_TABLE          | CTC_PARTITION     | 
CTC_TIMESTAMP       | CTC_WRITEID | CTC_UPDATE_DELETE |
+---+--++---+-+-+---+
|         2 | default      | abc    | city=bangalore    | 2020-11-25 09:26:59 | 
          1 | N                 |
+---+--++---+-+-+---+
{noformat}
 

AutoCompaction fails to get triggered with below error
{code:java}
2020-11-25T09:35:10,364 INFO [Thread-9]: compactor.Initiator 
(Initiator.java:run(98)) - Checking to see if we should compact 
default.abc.city=bangalore
2020-11-25T09:35:10,380 INFO [Thread-9]: compactor.Initiator 
(Initiator.java:run(155)) - Can't find partition 
default.compaction_test.city=bhubaneshwar, assuming it has been dropped and 
moving on{code}


> AutoCompaction is not getting triggered for CamelCase Partition Values
> --
>
> Key: HIVE-24433
> URL: https://issues.apache.org/jira/browse/HIVE-24433
> Project: Hive
>  Issue Type: Bug
>Reporter: Naresh P R
>Assignee: Naresh P R
>Priority: Major
>  Labels: 

[jira] [Updated] (HIVE-24433) AutoCompaction is not getting triggered for CamelCase Partition Values

2020-11-25 Thread Naresh P R (Jira)


 [ 
https://issues.apache.org/jira/browse/HIVE-24433?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Naresh P R updated HIVE-24433:
--
Description: 
PartionKeyValue is getting converted into lowerCase in below 2 places.

[https://github.com/apache/hive/blob/master/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/txn/TxnHandler.java#L2728]

[https://github.com/apache/hive/blob/master/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/txn/TxnHandler.java#L2851]

Because of which TXN_COMPONENTS & HIVE_LOCKS tables are not having entries from 
proper partition values.

When query completes, the entry moves from TXN_COMPONENTS to 
COMPLETED_TXN_COMPONENTS. Hive AutoCompaction will not recognize the partition 
& considers it as invalid partition
{code:java}
create table abc(name string) partitioned by(city string) stored as orc 
tblproperties('transactional'='true');
insert into abc partition(city='Bangalore') values('aaa');
{code}
Example entry in COMPLETED_TXN_COMPONENTS
{noformat}
+---+--++---+-+-+---+
| CTC_TXNID | CTC_DATABASE | CTC_TABLE          | CTC_PARTITION     | 
CTC_TIMESTAMP       | CTC_WRITEID | CTC_UPDATE_DELETE |
+---+--++---+-+-+---+
|         2 | default      | abc    | city=bangalore    | 2020-11-25 09:26:59 | 
          1 | N                 |
+---+--++---+-+-+---+
{noformat}
 

AutoCompaction fails to get triggered with below error
{code:java}
2020-11-25T09:35:10,364 INFO [Thread-9]: compactor.Initiator 
(Initiator.java:run(98)) - Checking to see if we should compact 
default.abc.city=bangalore
2020-11-25T09:35:10,380 INFO [Thread-9]: compactor.Initiator 
(Initiator.java:run(155)) - Can't find partition 
default.compaction_test.city=bhubaneshwar, assuming it has been dropped and 
moving on{code}

  was:
PartionKeyValue is getting converted into lowerCase in below 2 places.

[https://github.com/apache/hive/blob/master/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/txn/TxnHandler.java#L2728]

[https://github.com/apache/hive/blob/master/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/txn/TxnHandler.java#L2851]

Because of which TXN_COMPONENTS & HIVE_LOCKS tables are not having entries from 
proper partition values.

When query completes, the entry moves from TXN_COMPONENTS to 
COMPLETED_TXN_COMPONENTS. Hive AutoCompaction will not recognize the partition 
& considers it as invalid partition

 
{code:java}
create table abc(name string) partitioned by(city string) stored as orc 
tblproperties('transactional'='true');
insert into abc partition(city='Bangalore') values('aaa');
{code}
 

Example entry in COMPLETED_TXN_COMPONENTS

 
{noformat}
+---+--++---+-+-+---+
| CTC_TXNID | CTC_DATABASE | CTC_TABLE          | CTC_PARTITION     | 
CTC_TIMESTAMP       | CTC_WRITEID | CTC_UPDATE_DELETE |
+---+--++---+-+-+---+
|         2 | default      | abc    | city=bangalore    | 2020-11-25 09:26:59 | 
          1 | N                 |
+---+--++---+-+-+---+
{noformat}
 

AutoCompaction fails to get triggered with below error
{code:java}
2020-11-25T09:35:10,364 INFO [Thread-9]: compactor.Initiator 
(Initiator.java:run(98)) - Checking to see if we should compact 
default.abc.city=bangalore
2020-11-25T09:35:10,380 INFO [Thread-9]: compactor.Initiator 
(Initiator.java:run(155)) - Can't find partition 
default.compaction_test.city=bhubaneshwar, assuming it has been dropped and 
moving on{code}


> AutoCompaction is not getting triggered for CamelCase Partition Values
> --
>
> Key: HIVE-24433
> URL: https://issues.apache.org/jira/browse/HIVE-24433
> Project: Hive
>  Issue Type: Bug
>Reporter: Naresh P R
>Assignee: Naresh P R
>Priority: Major
>
> PartionKeyValue is getting converted into lowerCase in below 2 places.
> [https://github.com/apache/hive/blob/master/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/txn/TxnHandler.java#L2728]
> [https://github.com/apache/hive/blob/master/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/txn/TxnHandler.java#L2851]
> Because of which 

[jira] [Updated] (HIVE-24433) AutoCompaction is not getting triggered for CamelCase Partition Values

2020-11-25 Thread Naresh P R (Jira)


 [ 
https://issues.apache.org/jira/browse/HIVE-24433?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Naresh P R updated HIVE-24433:
--
Description: 
PartionKeyValue is getting converted into lowerCase in below 2 places.

[https://github.com/apache/hive/blob/master/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/txn/TxnHandler.java#L2728]

[https://github.com/apache/hive/blob/master/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/txn/TxnHandler.java#L2851]

Because of which TXN_COMPONENTS & HIVE_LOCKS tables are not having entries from 
proper partition values.

When query completes, the entry moves from TXN_COMPONENTS to 
COMPLETED_TXN_COMPONENTS. Hive AutoCompaction will not recognize the partition 
& considers it as invalid partition

 
{code:java}
create table abc(name string) partitioned by(city string) stored as orc 
tblproperties('transactional'='true');
insert into abc partition(city='Bangalore') values('aaa');
{code}
 

Example entry in COMPLETED_TXN_COMPONENTS

 
{noformat}
+---+--++---+-+-+---+
| CTC_TXNID | CTC_DATABASE | CTC_TABLE          | CTC_PARTITION     | 
CTC_TIMESTAMP       | CTC_WRITEID | CTC_UPDATE_DELETE |
+---+--++---+-+-+---+
|         2 | default      | abc    | city=bangalore    | 2020-11-25 09:26:59 | 
          1 | N                 |
+---+--++---+-+-+---+
{noformat}
 

AutoCompaction fails to get triggered with below error
{code:java}
2020-11-25T09:35:10,364 INFO [Thread-9]: compactor.Initiator 
(Initiator.java:run(98)) - Checking to see if we should compact 
default.abc.city=bangalore
2020-11-25T09:35:10,380 INFO [Thread-9]: compactor.Initiator 
(Initiator.java:run(155)) - Can't find partition 
default.compaction_test.city=bhubaneshwar, assuming it has been dropped and 
moving on{code}

  was:
PartionKeyValue is getting converted into lowerCase in below 2 places.

[https://github.com/apache/hive/blob/master/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/txn/TxnHandler.java#L2728]

[https://github.com/apache/hive/blob/master/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/txn/TxnHandler.java#L2851]

Because of which TXN_COMPONENTS & HIVE_LOCKS tables are not having entries from 
proper partition values.

When query completes, the entry moves from TXN_COMPONENTS to 
COMPLETED_TXN_COMPONENTS. Hive AutoCompaction will not recognize the partition 
& considers it as invalid partition

create table abc(name string) partitioned by(city string) stored as orc 
tblproperties('transactional'='true');

insert into abc partition(city='Bangalore') values('aaa');

Example entry in COMPLETED_TXN_COMPONENTS

 
{noformat}
+---+--++---+-+-+---+
| CTC_TXNID | CTC_DATABASE | CTC_TABLE          | CTC_PARTITION     | 
CTC_TIMESTAMP       | CTC_WRITEID | CTC_UPDATE_DELETE |
+---+--++---+-+-+---+
|         2 | default      | abc    | city=bangalore    | 2020-11-25 09:26:59 | 
          1 | N                 |
+---+--++---+-+-+---+
{noformat}
 

AutoCompaction fails to get triggered with below error
{code:java}
2020-11-25T09:35:10,364 INFO [Thread-9]: compactor.Initiator 
(Initiator.java:run(98)) - Checking to see if we should compact 
default.abc.city=bangalore
 2020-11-25T09:35:10,380 INFO [Thread-9]: compactor.Initiator 
(Initiator.java:run(155)) - Can't find partition 
default.compaction_test.city=bhubaneshwar, assuming it has been dropped and 
moving on{code}


> AutoCompaction is not getting triggered for CamelCase Partition Values
> --
>
> Key: HIVE-24433
> URL: https://issues.apache.org/jira/browse/HIVE-24433
> Project: Hive
>  Issue Type: Bug
>Reporter: Naresh P R
>Assignee: Naresh P R
>Priority: Major
>
> PartionKeyValue is getting converted into lowerCase in below 2 places.
> [https://github.com/apache/hive/blob/master/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/txn/TxnHandler.java#L2728]
> [https://github.com/apache/hive/blob/master/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/txn/TxnHandler.java#L2851]
> Because of which TXN_COMPONENTS & 

[jira] [Updated] (HIVE-24433) AutoCompaction is not getting triggered for CamelCase Partition Values

2020-11-25 Thread Naresh P R (Jira)


 [ 
https://issues.apache.org/jira/browse/HIVE-24433?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Naresh P R updated HIVE-24433:
--
Description: 
PartionKeyValue is getting converted into lowerCase in below 2 places.

[https://github.com/apache/hive/blob/master/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/txn/TxnHandler.java#L2728]

[https://github.com/apache/hive/blob/master/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/txn/TxnHandler.java#L2851]

Because of which TXN_COMPONENTS & HIVE_LOCKS tables are not having entries from 
proper partition values.

When query completes, the entry moves from TXN_COMPONENTS to 
COMPLETED_TXN_COMPONENTS. Hive AutoCompaction will not recognize the partition 
& considers it as invalid partition

create table abc(name string) partitioned by(city string) stored as orc 
tblproperties('transactional'='true');

insert into abc partition(city='Bangalore') values('aaa');

Example entry in COMPLETED_TXN_COMPONENTS

 
{noformat}
+---+--++---+-+-+---+
| CTC_TXNID | CTC_DATABASE | CTC_TABLE          | CTC_PARTITION     | 
CTC_TIMESTAMP       | CTC_WRITEID | CTC_UPDATE_DELETE |
+---+--++---+-+-+---+
|         2 | default      | abc    | city=bangalore    | 2020-11-25 09:26:59 | 
          1 | N                 |
+---+--++---+-+-+---+
{noformat}
 

AutoCompaction fails to get triggered with below error
{code:java}
2020-11-25T09:35:10,364 INFO [Thread-9]: compactor.Initiator 
(Initiator.java:run(98)) - Checking to see if we should compact 
default.abc.city=bangalore
 2020-11-25T09:35:10,380 INFO [Thread-9]: compactor.Initiator 
(Initiator.java:run(155)) - Can't find partition 
default.compaction_test.city=bhubaneshwar, assuming it has been dropped and 
moving on{code}

  was:
partionKey=paritionValue is getting converted into lowerCase in below 2 places.

[https://github.com/apache/hive/blob/master/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/txn/TxnHandler.java#L2728]

https://github.com/apache/hive/blob/master/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/txn/TxnHandler.java#L2851

Because of which TXN_COMPONENTS & HIVE_LOCKS tables are not having entries from 
proper partition values.

When query completes, the entry moves from TXN_COMPONENTS to 
COMPLETED_TXN_COMPONENTS. Hive AutoCompaction will not recognize the partition 
& considers it as invalid partition

create table abc(name string) partitioned by(city string) stored as orc 
tblproperties('transactional'='true');

insert into abc partition(city='Bangalore') values('aaa');

Example entry in COMPLETED_TXN_COMPONENTS

 
{noformat}
+---+--++---+-+-+---+
| CTC_TXNID | CTC_DATABASE | CTC_TABLE          | CTC_PARTITION     | 
CTC_TIMESTAMP       | CTC_WRITEID | CTC_UPDATE_DELETE |
+---+--++---+-+-+---+
|         2 | default      | abc    | city=bangalore    | 2020-11-25 09:26:59 | 
          1 | N                 |
+---+--++---+-+-+---+
{noformat}
 

AutoCompaction fails to get triggered with below error
2020-11-25T09:35:10,364 INFO  [Thread-9]: compactor.Initiator 
(Initiator.java:run(98)) - Checking to see if we should compact 
default.abc.city=bangalore
2020-11-25T09:35:10,380 INFO  [Thread-9]: compactor.Initiator 
(Initiator.java:run(155)) - Can't find partition 
default.compaction_test.city=bhubaneshwar, assuming it has been dropped and 
moving on


> AutoCompaction is not getting triggered for CamelCase Partition Values
> --
>
> Key: HIVE-24433
> URL: https://issues.apache.org/jira/browse/HIVE-24433
> Project: Hive
>  Issue Type: Bug
>Reporter: Naresh P R
>Assignee: Naresh P R
>Priority: Major
>
> PartionKeyValue is getting converted into lowerCase in below 2 places.
> [https://github.com/apache/hive/blob/master/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/txn/TxnHandler.java#L2728]
> [https://github.com/apache/hive/blob/master/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/txn/TxnHandler.java#L2851]
> Because of which TXN_COMPONENTS & HIVE_LOCKS tables are not having 

[jira] [Assigned] (HIVE-24433) AutoCompaction is not getting triggered for CamelCase Partition Values

2020-11-25 Thread Naresh P R (Jira)


 [ 
https://issues.apache.org/jira/browse/HIVE-24433?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Naresh P R reassigned HIVE-24433:
-


> AutoCompaction is not getting triggered for CamelCase Partition Values
> --
>
> Key: HIVE-24433
> URL: https://issues.apache.org/jira/browse/HIVE-24433
> Project: Hive
>  Issue Type: Bug
>Reporter: Naresh P R
>Assignee: Naresh P R
>Priority: Major
>
> partionKey=paritionValue is getting converted into lowerCase in below 2 
> places.
> [https://github.com/apache/hive/blob/master/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/txn/TxnHandler.java#L2728]
> https://github.com/apache/hive/blob/master/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/txn/TxnHandler.java#L2851
> Because of which TXN_COMPONENTS & HIVE_LOCKS tables are not having entries 
> from proper partition values.
> When query completes, the entry moves from TXN_COMPONENTS to 
> COMPLETED_TXN_COMPONENTS. Hive AutoCompaction will not recognize the 
> partition & considers it as invalid partition
> create table abc(name string) partitioned by(city string) stored as orc 
> tblproperties('transactional'='true');
> insert into abc partition(city='Bangalore') values('aaa');
> Example entry in COMPLETED_TXN_COMPONENTS
>  
> {noformat}
> +---+--++---+-+-+---+
> | CTC_TXNID | CTC_DATABASE | CTC_TABLE          | CTC_PARTITION     | 
> CTC_TIMESTAMP       | CTC_WRITEID | CTC_UPDATE_DELETE |
> +---+--++---+-+-+---+
> |         2 | default      | abc    | city=bangalore    | 2020-11-25 09:26:59 
> |           1 | N                 |
> +---+--++---+-+-+---+
> {noformat}
>  
> AutoCompaction fails to get triggered with below error
> 2020-11-25T09:35:10,364 INFO  [Thread-9]: compactor.Initiator 
> (Initiator.java:run(98)) - Checking to see if we should compact 
> default.abc.city=bangalore
> 2020-11-25T09:35:10,380 INFO  [Thread-9]: compactor.Initiator 
> (Initiator.java:run(155)) - Can't find partition 
> default.compaction_test.city=bhubaneshwar, assuming it has been dropped and 
> moving on



--
This message was sent by Atlassian Jira
(v8.3.4#803005)


[jira] [Updated] (HIVE-24381) compressed text input returns 0 rows if skip header/footer is mentioned.

2020-11-12 Thread Naresh P R (Jira)


 [ 
https://issues.apache.org/jira/browse/HIVE-24381?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Naresh P R updated HIVE-24381:
--
Attachment: test.q

> compressed text input returns 0 rows if skip header/footer is mentioned.
> 
>
> Key: HIVE-24381
> URL: https://issues.apache.org/jira/browse/HIVE-24381
> Project: Hive
>  Issue Type: Bug
>Reporter: Naresh P R
>Assignee: Naresh P R
>Priority: Major
> Attachments: test.q
>
>
> Attached q file returns 0 rows with hive.fetch.task.conversion=none, instead 
> correct result is 2 rows.



--
This message was sent by Atlassian Jira
(v8.3.4#803005)


[jira] [Assigned] (HIVE-24381) compressed text input returns 0 rows if skip header/footer is mentioned.

2020-11-12 Thread Naresh P R (Jira)


 [ 
https://issues.apache.org/jira/browse/HIVE-24381?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Naresh P R reassigned HIVE-24381:
-


> compressed text input returns 0 rows if skip header/footer is mentioned.
> 
>
> Key: HIVE-24381
> URL: https://issues.apache.org/jira/browse/HIVE-24381
> Project: Hive
>  Issue Type: Bug
>Reporter: Naresh P R
>Assignee: Naresh P R
>Priority: Major
>
> Attached q file returns 0 rows with hive.fetch.task.conversion=none, instead 
> correct result is 2 rows.



--
This message was sent by Atlassian Jira
(v8.3.4#803005)


[jira] [Comment Edited] (HIVE-24294) TezSessionPool sessions can throw AssertionError

2020-10-29 Thread Naresh P R (Jira)


[ 
https://issues.apache.org/jira/browse/HIVE-24294?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=17223178#comment-17223178
 ] 

Naresh P R edited comment on HIVE-24294 at 10/29/20, 7:48 PM:
--

Thanks for the review & commit [~lpinter] & [~mustafaiman]


was (Author: nareshpr):
Thanks for the review & commit [~lpinter]

> TezSessionPool sessions can throw AssertionError
> 
>
> Key: HIVE-24294
> URL: https://issues.apache.org/jira/browse/HIVE-24294
> Project: Hive
>  Issue Type: Bug
>Reporter: Naresh P R
>Assignee: Naresh P R
>Priority: Major
>  Labels: pull-request-available
> Fix For: 4.0.0
>
>  Time Spent: 40m
>  Remaining Estimate: 0h
>
> Whenever default TezSessionPool sessions are reopened for some reason, we are 
> setting dagResources to null before close & setting it back in openWhenever 
> default TezSessionPool sessions are reopened for some reason, we are setting 
> dagResources to null before close & setting it back in open
> https://github.com/apache/hive/blob/master/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezSessionPoolManager.java#L498-L503
> If there is an exception in sessionState.close(), we are not restoring the 
> dagResource but moving the session back to TezSessionPool.eg., exception 
> trace when sessionState.close() failed
> {code:java}
> 2020-10-15T09:20:28,749 INFO  [HiveServer2-Background-Pool: Thread-25451]: 
> client.TezClient (:()) - Failed to shutdown Tez Session via proxy
> org.apache.tez.dag.api.SessionNotRunning: Application not running, 
> applicationId=application_1602093123456_12345, yarnApplicationState=FINISHED, 
> finalApplicationStatus=SUCCEEDED, 
> trackingUrl=http://localhost:8088/proxy/application_1602093123456_12345/, 
> diagnostics=Session timed out, lastDAGCompletionTime=1602997683786 ms, 
> sessionTimeoutInterval=60 ms
> Session stats:submittedDAGs=2, successfulDAGs=2, failedDAGs=0, killedDAGs=0   
>  at 
> org.apache.tez.client.TezClientUtils.getAMProxy(TezClientUtils.java:910) 
> at org.apache.tez.client.TezClient.getAMProxy(TezClient.java:1060) 
> at org.apache.tez.client.TezClient.stop(TezClient.java:743) 
> at 
> org.apache.hadoop.hive.ql.exec.tez.TezSessionState.closeClient(TezSessionState.java:789)
>  
> at 
> org.apache.hadoop.hive.ql.exec.tez.TezSessionState.close(TezSessionState.java:756)
>  
> at 
> org.apache.hadoop.hive.ql.exec.tez.TezSessionPoolSession.close(TezSessionPoolSession.java:111)
>  
> at 
> org.apache.hadoop.hive.ql.exec.tez.TezSessionPoolManager.reopenInternal(TezSessionPoolManager.java:496)
>  
> at 
> org.apache.hadoop.hive.ql.exec.tez.TezSessionPoolManager.reopen(TezSessionPoolManager.java:487)
>  
> at 
> org.apache.hadoop.hive.ql.exec.tez.TezSessionPoolSession.reopen(TezSessionPoolSession.java:228)
>  
> at 
> org.apache.hadoop.hive.ql.exec.tez.TezTask.getNewTezSessionOnError(TezTask.java:531)
>  
> at 
> org.apache.hadoop.hive.ql.exec.tez.TezTask.submit(TezTask.java:546) 
> at 
> org.apache.hadoop.hive.ql.exec.tez.TezTask.execute(TezTask.java:221){code}
> Because of this, all new queries using this corrupted sessions are failing 
> with below exception
> {code:java}
> Caused by: java.lang.AssertionError: Ensure called on an unitialized (or 
> closed) session 41774265-b7da-4d58-84a8-1bedfd597aecCaused by: 
> java.lang.AssertionError: Ensure called on an unitialized (or closed) session 
> 41774265-b7da-4d58-84a8-1bedfd597aec at 
> org.apache.hadoop.hive.ql.exec.tez.TezSessionState.ensureLocalResources(TezSessionState.java:685){code}



--
This message was sent by Atlassian Jira
(v8.3.4#803005)


  1   2   3   4   >