[jira] [Updated] (HIVE-26505) Case When Some result data is lost when there are common column conditions and partitioned column conditions
[ https://issues.apache.org/jira/browse/HIVE-26505?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] GuangMing Lu updated HIVE-26505: Description: {code:java} create table test0831 (id string) partitioned by (cp string); insert into test0831 values ('a', '2022-08-23'),('c', '2022-08-23'),('d', '2022-08-23'); insert into test0831 values ('a', '2022-08-24'),('b', '2022-08-24'); select * from test0831; +-+--+ | test0831.id | test0831.cp | +-+--+ | a | 2022-08-23 | | b | 2022-08-23 | | a | 2022-08-23 | | c | 2022-08-24 | | d | 2022-08-24 | +-+--+ select * from test0831 where (case when id='a' and cp='2022-08-23' then 1 else 0 end)=0; +--+--+ | test0830.id | test0830.cp | +--+--+ | a | 2022-08-24 | | b | 2022-08-24 | +--+--+ {code} was: {code:java} create table test0831 (id string) partitioned by (cp string); insert into test0831 values ('a', '2022-08-23'),('c', '2022-08-23'),('d', '2022-08-23'); insert into test0831 values ('a', '2022-08-24'),('b', '2022-08-24'); select * from test0831; +-+--+ | test0831.id | test0831.cp | +-+--+ | a | 2022-08-23 | | b | 2022-08-23 | | a | 2022-08-23 | | c | 2022-08-24 | | d | 2022-08-24 | +-+--+ select * from test0831 where (case when id='a' and cp='2022-08-24' then 1 else 0 end)=0; +--+--+ | test0830.id | test0830.cp | +--+--+ | a | 2022-08-23 | | c | 2022-08-23 | | d | 2022-08-23 | +--+--+{code} > Case When Some result data is lost when there are common column conditions > and partitioned column conditions > - > > Key: HIVE-26505 > URL: https://issues.apache.org/jira/browse/HIVE-26505 > Project: Hive > Issue Type: Bug > Components: Hive >Affects Versions: 3.1.0, 4.0.0-alpha-1 >Reporter: GuangMing Lu >Priority: Critical > > {code:java} > create table test0831 (id string) partitioned by (cp string); > insert into test0831 values ('a', '2022-08-23'),('c', '2022-08-23'),('d', > '2022-08-23'); > insert into test0831 values ('a', '2022-08-24'),('b', '2022-08-24'); > select * from test0831; > +-+--+ > | test0831.id | test0831.cp | > +-+--+ > | a | 2022-08-23 | > | b | 2022-08-23 | > | a | 2022-08-23 | > | c | 2022-08-24 | > | d | 2022-08-24 | > +-+--+ > select * from test0831 where (case when id='a' and cp='2022-08-23' then 1 > else 0 end)=0; > +--+--+ > | test0830.id | test0830.cp | > +--+--+ > | a | 2022-08-24 | > | b | 2022-08-24 | > +--+--+ > {code} > -- This message was sent by Atlassian Jira (v8.20.10#820010)
[jira] [Updated] (HIVE-26505) Case When Some result data is lost when there are common column conditions and partitioned column conditions
[ https://issues.apache.org/jira/browse/HIVE-26505?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] GuangMing Lu updated HIVE-26505: Description: {code:java} create table test0831 (id string) partitioned by (cp string); insert into test0831 values ('a', '2022-08-23'),('c', '2022-08-23'),('d', '2022-08-23'); insert into test0831 values ('a', '2022-08-24'),('b', '2022-08-24'); select * from test0831; +-+--+ | test0831.id | test0831.cp | +-+--+ | a | 2022-08-23 | | b | 2022-08-23 | | a | 2022-08-23 | | c | 2022-08-24 | | d | 2022-08-24 | +-+--+ select * from test0831 where (case when id='a' and cp='2022-08-24' then 1 else 0 end)=0; +--+--+ | test0830.id | test0830.cp | +--+--+ | a | 2022-08-23 | | c | 2022-08-23 | | d | 2022-08-23 | +--+--+{code} was: {code:java} create table test0831 (id string) partitioned by (cp string); insert into test0831 values ('a', '2022-08-23'),('c', '2022-08-24'),('d', '2022-08-244'); insert into test0831 values ('a', '2022-08-24'),('b', '2022-08-24'); select * from test0831; +-+--+ | test0831.id | test0831.cp | +-+--+ | a | 2022-08-23 | | b | 2022-08-23 | | a | 2022-08-23 | | c | 2022-08-24 | | d | 2022-08-24 | +-+--+ select * from test0831 where (case when id='a' and cp='2022-08-24' then 1 else 0 end)=0; +--+--+ | test0830.id | test0830.cp | +--+--+ | a | 2022-08-23 | | c | 2022-08-23 | | d | 2022-08-23 | +--+--+{code} > Case When Some result data is lost when there are common column conditions > and partitioned column conditions > - > > Key: HIVE-26505 > URL: https://issues.apache.org/jira/browse/HIVE-26505 > Project: Hive > Issue Type: Bug > Components: Hive >Affects Versions: 3.1.0, 4.0.0-alpha-1 >Reporter: GuangMing Lu >Priority: Critical > > {code:java} > create table test0831 (id string) partitioned by (cp string); > insert into test0831 values ('a', '2022-08-23'),('c', '2022-08-23'),('d', > '2022-08-23'); > insert into test0831 values ('a', '2022-08-24'),('b', '2022-08-24'); > select * from test0831; > +-+--+ > | test0831.id | test0831.cp | > +-+--+ > | a | 2022-08-23 | > | b | 2022-08-23 | > | a | 2022-08-23 | > | c | 2022-08-24 | > | d | 2022-08-24 | > +-+--+ > select * from test0831 where (case when id='a' and cp='2022-08-24' then 1 > else 0 end)=0; > +--+--+ > | test0830.id | test0830.cp | > +--+--+ > | a | 2022-08-23 | > | c | 2022-08-23 | > | d | 2022-08-23 | > +--+--+{code} > -- This message was sent by Atlassian Jira (v8.20.10#820010)
[jira] [Updated] (HIVE-26505) Case When Some result data is lost when there are common column conditions and partitioned column conditions
[ https://issues.apache.org/jira/browse/HIVE-26505?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] GuangMing Lu updated HIVE-26505: Description: {code:java} create table test0831 (id string) partitioned by (cp string); insert into test0831 values ('a', '2022-08-23'),('c', '2022-08-24'),('d', '2022-08-244'); insert into test0831 values ('a', '2022-08-24'),('b', '2022-08-24'); select * from test0831; +-+--+ | test0831.id | test0831.cp | +-+--+ | a | 2022-08-23 | | b | 2022-08-23 | | a | 2022-08-23 | | c | 2022-08-24 | | d | 2022-08-24 | +-+--+ select * from test0831 where (case when id='a' and cp='2022-08-24' then 1 else 0 end)=0; +--+--+ | test0830.id | test0830.cp | +--+--+ | a | 2022-08-23 | | c | 2022-08-23 | | d | 2022-08-23 | +--+--+{code} was: {code:java} create table test0831 (id string, cp string) stored as orc; insert into test0831 values ('a', '2022-08-23'),('c', '2022-08-24'),('d', '2022-08-244'); insert into test0831 values ('a', '2022-08-24'),('b', '2022-08-24'); select * from test0831; +-+--+ | test0831.id | test0831.cp | +-+--+ | a | 2022-08-23 | | b | 2022-08-23 | | a | 2022-08-23 | | c | 2022-08-24 | | d | 2022-08-24 | +-+--+ select * from test0831 where (case when id='a' and cp='2022-08-24' then 1 else 0 end)=0; +--+--+ | test0830.id | test0830.cp | +--+--+ | a | 2022-08-23 | | c | 2022-08-23 | | d | 2022-08-23 | +--+--+{code} > Case When Some result data is lost when there are common column conditions > and partitioned column conditions > - > > Key: HIVE-26505 > URL: https://issues.apache.org/jira/browse/HIVE-26505 > Project: Hive > Issue Type: Bug > Components: Hive >Affects Versions: 3.1.0, 4.0.0-alpha-1 >Reporter: GuangMing Lu >Priority: Critical > > {code:java} > create table test0831 (id string) partitioned by (cp string); > insert into test0831 values ('a', '2022-08-23'),('c', '2022-08-24'),('d', > '2022-08-244'); > insert into test0831 values ('a', '2022-08-24'),('b', '2022-08-24'); > select * from test0831; > +-+--+ > | test0831.id | test0831.cp | > +-+--+ > | a | 2022-08-23 | > | b | 2022-08-23 | > | a | 2022-08-23 | > | c | 2022-08-24 | > | d | 2022-08-24 | > +-+--+ > select * from test0831 where (case when id='a' and cp='2022-08-24' then 1 > else 0 end)=0; > +--+--+ > | test0830.id | test0830.cp | > +--+--+ > | a | 2022-08-23 | > | c | 2022-08-23 | > | d | 2022-08-23 | > +--+--+{code} > -- This message was sent by Atlassian Jira (v8.20.10#820010)
[jira] [Updated] (HIVE-26505) Case When Some result data is lost when there are common column conditions and partitioned column conditions
[ https://issues.apache.org/jira/browse/HIVE-26505?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] GuangMing Lu updated HIVE-26505: Description: {code:java} create table test0831 (id string, cp string) stored as orc; insert into test0831 values ('a', '2022-08-23'),('c', '2022-08-24'),('d', '2022-08-244'); insert into test0831 values ('a', '2022-08-24'),('b', '2022-08-24'); select * from test0831; +-+--+ | test0831.id | test0831.cp | +-+--+ | a | 2022-08-23 | | b | 2022-08-23 | | a | 2022-08-23 | | c | 2022-08-24 | | d | 2022-08-24 | +-+--+ select * from test0831 where (case when id='a' and cp='2022-08-24' then 1 else 0 end)=0; +--+--+ | test0830.id | test0830.cp | +--+--+ | a | 2022-08-23 | | c | 2022-08-23 | | d | 2022-08-23 | +--+--+{code} was: insert into test0831 values ('a', '2022-08-23'),('c', '2022-08-24'),('d', '2022-08-244'); insert into test0831 values ('a', '2022-08-24'),('b', '2022-08-24'); select * from test0831; +-+--+ | test0831.id | test0831.cp | +-+--+ | a | 2022-08-23 | | b | 2022-08-23 | | a | 2022-08-23 | | c | 2022-08-24 | | d | 2022-08-24 | +-+--+ select * from test0831 where (case when id='a' and cp='2022-08-24' then 1 else 0 end)=0; +--+--+ | test0830.id | test0830.cp | +--+--+ | a | 2022-08-23 | | c | 2022-08-23 | | d | 2022-08-23 | +--+--+ > Case When Some result data is lost when there are common column conditions > and partitioned column conditions > - > > Key: HIVE-26505 > URL: https://issues.apache.org/jira/browse/HIVE-26505 > Project: Hive > Issue Type: Bug > Components: Hive >Affects Versions: 3.1.0, 4.0.0-alpha-1 >Reporter: GuangMing Lu >Priority: Major > > {code:java} > create table test0831 (id string, cp string) stored as orc; > insert into test0831 values ('a', '2022-08-23'),('c', '2022-08-24'),('d', > '2022-08-244'); > insert into test0831 values ('a', '2022-08-24'),('b', '2022-08-24'); > select * from test0831; > +-+--+ > | test0831.id | test0831.cp | > +-+--+ > | a | 2022-08-23 | > | b | 2022-08-23 | > | a | 2022-08-23 | > | c | 2022-08-24 | > | d | 2022-08-24 | > +-+--+ > select * from test0831 where (case when id='a' and cp='2022-08-24' then 1 > else 0 end)=0; > +--+--+ > | test0830.id | test0830.cp | > +--+--+ > | a | 2022-08-23 | > | c | 2022-08-23 | > | d | 2022-08-23 | > +--+--+{code} > -- This message was sent by Atlassian Jira (v8.20.10#820010)
[jira] [Updated] (HIVE-26505) Case When Some result data is lost when there are common column conditions and partitioned column conditions
[ https://issues.apache.org/jira/browse/HIVE-26505?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] GuangMing Lu updated HIVE-26505: Priority: Critical (was: Major) > Case When Some result data is lost when there are common column conditions > and partitioned column conditions > - > > Key: HIVE-26505 > URL: https://issues.apache.org/jira/browse/HIVE-26505 > Project: Hive > Issue Type: Bug > Components: Hive >Affects Versions: 3.1.0, 4.0.0-alpha-1 >Reporter: GuangMing Lu >Priority: Critical > > {code:java} > create table test0831 (id string, cp string) stored as orc; > insert into test0831 values ('a', '2022-08-23'),('c', '2022-08-24'),('d', > '2022-08-244'); > insert into test0831 values ('a', '2022-08-24'),('b', '2022-08-24'); > select * from test0831; > +-+--+ > | test0831.id | test0831.cp | > +-+--+ > | a | 2022-08-23 | > | b | 2022-08-23 | > | a | 2022-08-23 | > | c | 2022-08-24 | > | d | 2022-08-24 | > +-+--+ > select * from test0831 where (case when id='a' and cp='2022-08-24' then 1 > else 0 end)=0; > +--+--+ > | test0830.id | test0830.cp | > +--+--+ > | a | 2022-08-23 | > | c | 2022-08-23 | > | d | 2022-08-23 | > +--+--+{code} > -- This message was sent by Atlassian Jira (v8.20.10#820010)
[jira] [Commented] (HIVE-26342) About EOL schedule discussion
[ https://issues.apache.org/jira/browse/HIVE-26342?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17556765#comment-17556765 ] GuangMing Lu commented on HIVE-26342: - h4. Hi [Stamatis Zampetakis,|https://issues.apache.org/jira/secure/ViewProfile.jspa?name=zabetak] Ok, I will continue to participate and discuss together > About EOL schedule discussion > - > > Key: HIVE-26342 > URL: https://issues.apache.org/jira/browse/HIVE-26342 > Project: Hive > Issue Type: Task >Affects Versions: All Versions >Reporter: GuangMing Lu >Assignee: Aihua Xu >Priority: Major > > h4. Hi [Aihua > Xu|https://issues.apache.org/jira/secure/ViewProfile.jspa?name=aihuaxu] , > [Stamatis > Zampetakis|https://issues.apache.org/jira/secure/ViewProfile.jspa?name=zabetak] > h4. We don't have a EOL schedule for every version at present, can we discuss > making one? -- This message was sent by Atlassian Jira (v8.20.7#820007)
[jira] [Updated] (HIVE-26342) About EOL schedule discussion
[ https://issues.apache.org/jira/browse/HIVE-26342?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] GuangMing Lu updated HIVE-26342: Description: h4. Hi [Aihua Xu|https://issues.apache.org/jira/secure/ViewProfile.jspa?name=aihuaxu] , [Stamatis Zampetakis|https://issues.apache.org/jira/secure/ViewProfile.jspa?name=zabetak] h4. We don't have a EOL schedule for every version at present, can we discuss making one? was: h4. Hi [Aihua Xu|https://issues.apache.org/jira/secure/ViewProfile.jspa?name=aihuaxu] and [Harish JP|https://issues.apache.org/jira/secure/ViewProfile.jspa?name=harishjp] h4. We don't have a EOL schedule for every version at present, can we discuss making one? > About EOL schedule discussion > - > > Key: HIVE-26342 > URL: https://issues.apache.org/jira/browse/HIVE-26342 > Project: Hive > Issue Type: Task >Affects Versions: All Versions >Reporter: GuangMing Lu >Assignee: Aihua Xu >Priority: Major > > h4. Hi [Aihua > Xu|https://issues.apache.org/jira/secure/ViewProfile.jspa?name=aihuaxu] , > [Stamatis > Zampetakis|https://issues.apache.org/jira/secure/ViewProfile.jspa?name=zabetak] > h4. We don't have a EOL schedule for every version at present, can we discuss > making one? -- This message was sent by Atlassian Jira (v8.20.7#820007)
[jira] [Updated] (HIVE-26342) About EOL schedule discussion
[ https://issues.apache.org/jira/browse/HIVE-26342?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] GuangMing Lu updated HIVE-26342: Issue Type: Task (was: Bug) > About EOL schedule discussion > - > > Key: HIVE-26342 > URL: https://issues.apache.org/jira/browse/HIVE-26342 > Project: Hive > Issue Type: Task >Affects Versions: All Versions >Reporter: GuangMing Lu >Assignee: Aihua Xu >Priority: Major > > h4. Hi [Aihua > Xu|https://issues.apache.org/jira/secure/ViewProfile.jspa?name=aihuaxu] and > [Harish > JP|https://issues.apache.org/jira/secure/ViewProfile.jspa?name=harishjp] > h4. We don't have a EOL schedule for every version at present, can we discuss > making one? -- This message was sent by Atlassian Jira (v8.20.7#820007)
[jira] [Commented] (HIVE-26342) About EOL schedule discussion
[ https://issues.apache.org/jira/browse/HIVE-26342?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17556325#comment-17556325 ] GuangMing Lu commented on HIVE-26342: - * Related discussion posts https://lists.apache.org/thread/sxcrcf4v9j630tl9domp0bn4m33bdq0s > About EOL schedule discussion > - > > Key: HIVE-26342 > URL: https://issues.apache.org/jira/browse/HIVE-26342 > Project: Hive > Issue Type: Bug >Affects Versions: All Versions >Reporter: GuangMing Lu >Assignee: Aihua Xu >Priority: Major > > h4. Hi [Aihua > Xu|https://issues.apache.org/jira/secure/ViewProfile.jspa?name=aihuaxu] and > [Harish > JP|https://issues.apache.org/jira/secure/ViewProfile.jspa?name=harishjp] > h4. We don't have a EOL schedule for every version at present, can we discuss > making one? -- This message was sent by Atlassian Jira (v8.20.7#820007)
[jira] [Updated] (HIVE-26342) About EOL schedule discussion
[ https://issues.apache.org/jira/browse/HIVE-26342?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] GuangMing Lu updated HIVE-26342: Affects Version/s: All Versions > About EOL schedule discussion > - > > Key: HIVE-26342 > URL: https://issues.apache.org/jira/browse/HIVE-26342 > Project: Hive > Issue Type: Bug >Affects Versions: All Versions >Reporter: GuangMing Lu >Assignee: Aihua Xu >Priority: Major > > h4. Hi [Aihua > Xu|https://issues.apache.org/jira/secure/ViewProfile.jspa?name=aihuaxu] and > [Harish > JP|https://issues.apache.org/jira/secure/ViewProfile.jspa?name=harishjp] > h4. We don't have a EOL schedule for every version at present, can we discuss > making one? -- This message was sent by Atlassian Jira (v8.20.7#820007)
[jira] [Assigned] (HIVE-26342) About EOL schedule discussion
[ https://issues.apache.org/jira/browse/HIVE-26342?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] GuangMing Lu reassigned HIVE-26342: --- Assignee: Aihua Xu > About EOL schedule discussion > - > > Key: HIVE-26342 > URL: https://issues.apache.org/jira/browse/HIVE-26342 > Project: Hive > Issue Type: Bug >Reporter: GuangMing Lu >Assignee: Aihua Xu >Priority: Major > > h4. Hi [Aihua > Xu|https://issues.apache.org/jira/secure/ViewProfile.jspa?name=aihuaxu] and > [Harish > JP|https://issues.apache.org/jira/secure/ViewProfile.jspa?name=harishjp] > h4. We don't have a EOL schedule for every version at present, can we discuss > making one? -- This message was sent by Atlassian Jira (v8.20.7#820007)
[jira] [Commented] (HIVE-20607) TxnHandler should use PreparedStatement to execute direct SQL queries.
[ https://issues.apache.org/jira/browse/HIVE-20607?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=1745#comment-1745 ] GuangMing Lu commented on HIVE-20607: - Hi [~sankarh] [~kgyrtkirk], Do you know Hive's EOL schedule? > TxnHandler should use PreparedStatement to execute direct SQL queries. > -- > > Key: HIVE-20607 > URL: https://issues.apache.org/jira/browse/HIVE-20607 > Project: Hive > Issue Type: Bug > Components: Standalone Metastore, Transactions >Affects Versions: 3.1.0, 4.0.0 >Reporter: Sankar Hariappan >Assignee: Sankar Hariappan >Priority: Major > Labels: ACID, pull-request-available > Fix For: 3.2.0, 4.0.0, 4.0.0-alpha-1 > > Attachments: HIVE-20607.01-branch-3.patch, HIVE-20607.01.patch > > > TxnHandler uses direct SQL queries to operate on Txn related databases/tables > in Hive metastore RDBMS. > Most of the methods are direct calls from Metastore api which should be > directly append input string arguments to the SQL string. > Need to use parameterised PreparedStatement object to set these arguments. -- This message was sent by Atlassian Jira (v8.20.7#820007)
[jira] [Assigned] (HIVE-25853) Security Vulnerability CVE-2021-44832 log4j2 need upgrade to 2.17.1
[ https://issues.apache.org/jira/browse/HIVE-25853?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] GuangMing Lu reassigned HIVE-25853: --- Assignee: GuangMing Lu > Security Vulnerability CVE-2021-44832 log4j2 need upgrade to 2.17.1 > --- > > Key: HIVE-25853 > URL: https://issues.apache.org/jira/browse/HIVE-25853 > Project: Hive > Issue Type: Bug >Affects Versions: 4.0.0 >Reporter: GuangMing Lu >Assignee: GuangMing Lu >Priority: Major > Fix For: 4.0.0 > > > Security Vulnerability CVE-2021-44832 log4j2 need upgrade to 2.17.1 -- This message was sent by Atlassian Jira (v8.20.1#820001)
[jira] [Updated] (HIVE-26018) The result of UNIQUEJOIN on Hive on Tez is inconsistent with that of MR
[ https://issues.apache.org/jira/browse/HIVE-26018?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] GuangMing Lu updated HIVE-26018: Attachment: (was: image-2022-03-09-21-08-17-835.png) > The result of UNIQUEJOIN on Hive on Tez is inconsistent with that of MR > --- > > Key: HIVE-26018 > URL: https://issues.apache.org/jira/browse/HIVE-26018 > Project: Hive > Issue Type: Bug > Components: Tez >Affects Versions: 3.1.0, 4.0.0 >Reporter: GuangMing Lu >Priority: Major > > The result of UNIQUEJOIN on Hive on Tez is inconsistent with that of MR, and > the result Is not correct, for example: > CREATE TABLE T1_n1x(key STRING, val STRING) STORED AS orc; > CREATE TABLE T2_n1x(key STRING, val STRING) STORED AS orc; > insert into T1_n1x values('aaa', '111'),('bbb', '222'),('ccc', '333'); > insert into T2_n1x values('aaa', '111'),('ddd', '444'),('ccc', '333'); > SELECT a.key, b.key FROM UNIQUEJOIN PRESERVE T1_n1x a (a.key), PRESERVE > T2_n1x b (b.key); > Hive on Tez result: wrong > |a.key |b.key | > |aaa |aaa | > |bbb |NULL | > |ccc |ccc | > |NULL |ddd | > +--+ > Hive on MR result: right > |a.key |b.key | > |aaa |aaa | > |bbb |NULL | > |ccc |ccc | > +-+ > SELECT a.key, b.key FROM UNIQUEJOIN T1_n1x a (a.key), T2_n1x b (b.key); > Hive on Tez result: wrong > +---+ > |a.key |b.key | > |aaa |aaa | > |bbb |NULL | > |ccc |ccc | > |NULL |ddd | > +-+ > Hive on MR result: right > |a.key |b.key | > |aaa |aaa | > |ccc |ccc | > > -- This message was sent by Atlassian Jira (v8.20.1#820001)
[jira] [Updated] (HIVE-26018) The result of UNIQUEJOIN on Hive on Tez is inconsistent with that of MR
[ https://issues.apache.org/jira/browse/HIVE-26018?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] GuangMing Lu updated HIVE-26018: Description: The result of UNIQUEJOIN on Hive on Tez is inconsistent with that of MR, and the result Is not correct, for example: CREATE TABLE T1_n1x(key STRING, val STRING) STORED AS orc; CREATE TABLE T2_n1x(key STRING, val STRING) STORED AS orc; insert into T1_n1x values('aaa', '111'),('bbb', '222'),('ccc', '333'); insert into T2_n1x values('aaa', '111'),('ddd', '444'),('ccc', '333'); SELECT a.key, b.key FROM UNIQUEJOIN PRESERVE T1_n1x a (a.key), PRESERVE T2_n1x b (b.key); Hive on Tez result: wrong |a.key |b.key | |aaa |aaa | |bbb |NULL | |ccc |ccc | |NULL |ddd | +--+ Hive on MR result: right |a.key |b.key | |aaa |aaa | |bbb |NULL | |ccc |ccc | +-+ SELECT a.key, b.key FROM UNIQUEJOIN T1_n1x a (a.key), T2_n1x b (b.key); Hive on Tez result: wrong +---+ |a.key |b.key | |aaa |aaa | |bbb |NULL | |ccc |ccc | |NULL |ddd | +-+ Hive on MR result: right |a.key |b.key | |aaa |aaa | |ccc |ccc | was: The result of UNIQUEJOIN on Hive on Tez is inconsistent with that of MR, and the result Is not correct, for example: CREATE TABLE T1_n1x(key STRING, val STRING) STORED AS orc; CREATE TABLE T2_n1x(key STRING, val STRING) STORED AS orc; insert into T1_n1x values('aaa', '111'),('bbb', '222'),('ccc', '333'); insert into T2_n1x values('aaa', '111'),('ddd', '444'),('ccc', '333'); SELECT a.key, b.key FROM UNIQUEJOIN PRESERVE T1_n1x a (a.key), PRESERVE T2_n1x b (b.key); Hive on Tez result: wrong {+}---{-}{-}{+}---+ |a.key |b.key | |aaa |aaa | |bbb |NULL | |ccc |ccc | |NULL |ddd | {+}---{-}{-}{+}---+ Hive on MR result: right {+}---{-}{-}{+}---+ |a.key |b.key | |aaa |aaa | |bbb |NULL | |ccc |ccc | {+}---{-}{-}{+}---+ SELECT a.key, b.key FROM UNIQUEJOIN T1_n1x a (a.key), T2_n1x b (b.key); Hive on Tez result: wrong {+}---{-}{-}{+}---+ |a.key |b.key | {+}---{-}{-}{+}---+ |aaa |aaa | |bbb |NULL | |ccc |ccc | |NULL |ddd | {+}---{-}{-}{+}---+ Hive on MR result: right {+}---{-}{-}{+}---+ |a.key |b.key | {+}---{-}{-}{+}---+ |aaa |aaa | |ccc |ccc | {+}---{-}{-}{+}---+ > The result of UNIQUEJOIN on Hive on Tez is inconsistent with that of MR > --- > > Key: HIVE-26018 > URL: https://issues.apache.org/jira/browse/HIVE-26018 > Project: Hive > Issue Type: Bug > Components: Tez >Affects Versions: 3.1.0, 4.0.0 >Reporter: GuangMing Lu >Priority: Major > Attachments: image-2022-03-09-21-08-17-835.png > > > The result of UNIQUEJOIN on Hive on Tez is inconsistent with that of MR, and > the result Is not correct, for example: > CREATE TABLE T1_n1x(key STRING, val STRING) STORED AS orc; > CREATE TABLE T2_n1x(key STRING, val STRING) STORED AS orc; > insert into T1_n1x values('aaa', '111'),('bbb', '222'),('ccc', '333'); > insert into T2_n1x values('aaa', '111'),('ddd', '444'),('ccc', '333'); > SELECT a.key, b.key FROM UNIQUEJOIN PRESERVE T1_n1x a (a.key), PRESERVE > T2_n1x b (b.key); > Hive on Tez result: wrong > |a.key |b.key | > |aaa |aaa | > |bbb |NULL | > |ccc |ccc | > |NULL |ddd | > +--+ > Hive on MR result: right > |a.key |b.key | > |aaa |aaa | > |bbb |NULL | > |ccc |ccc | > +-+ > SELECT a.key, b.key FROM UNIQUEJOIN T1_n1x a (a.key), T2_n1x b (b.key); > Hive on Tez result: wrong > +---+ > |a.key |b.key | > |aaa |aaa | > |bbb |NULL | > |ccc |ccc | > |NULL |ddd | > +-+ > Hive on MR result: right > |a.key |b.key | > |aaa |aaa | > |ccc |ccc | > > -- This message was sent by Atlassian Jira (v8.20.1#820001)
[jira] [Updated] (HIVE-26018) The result of UNIQUEJOIN on Hive on Tez is inconsistent with that of MR
[ https://issues.apache.org/jira/browse/HIVE-26018?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] GuangMing Lu updated HIVE-26018: Description: The result of UNIQUEJOIN on Hive on Tez is inconsistent with that of MR, and the result Is not correct, for example: CREATE TABLE T1_n1x(key STRING, val STRING) STORED AS orc; CREATE TABLE T2_n1x(key STRING, val STRING) STORED AS orc; insert into T1_n1x values('aaa', '111'),('bbb', '222'),('ccc', '333'); insert into T2_n1x values('aaa', '111'),('ddd', '444'),('ccc', '333'); SELECT a.key, b.key FROM UNIQUEJOIN PRESERVE T1_n1x a (a.key), PRESERVE T2_n1x b (b.key); Hive on Tez result: wrong {+}---{-}{-}{+}---+ |a.key |b.key | |aaa |aaa | |bbb |NULL | |ccc |ccc | |NULL |ddd | {+}---{-}{-}{+}---+ Hive on MR result: right {+}---{-}{-}{+}---+ |a.key |b.key | |aaa |aaa | |bbb |NULL | |ccc |ccc | {+}---{-}{-}{+}---+ SELECT a.key, b.key FROM UNIQUEJOIN T1_n1x a (a.key), T2_n1x b (b.key); Hive on Tez result: wrong {+}---{-}{-}{+}---+ |a.key |b.key | {+}---{-}{-}{+}---+ |aaa |aaa | |bbb |NULL | |ccc |ccc | |NULL |ddd | {+}---{-}{-}{+}---+ Hive on MR result: right {+}---{-}{-}{+}---+ |a.key |b.key | {+}---{-}{-}{+}---+ |aaa |aaa | |ccc |ccc | {+}---{-}{-}{+}---+ was: The result of UNIQUEJOIN on Hive on Tez is inconsistent with that of MR, and the result Is not correct, for example: CREATE TABLE T1_n1x(key STRING, val STRING) STORED AS orc; CREATE TABLE T2_n1x(key STRING, val STRING) STORED AS orc; insert into T1_n1x values('aaa', '111'),('bbb', '222'),('ccc', '333'); insert into T2_n1x values('aaa', '111'),('ddd', '444'),('ccc', '333'); SELECT a.key, b.key FROM UNIQUEJOIN PRESERVE T1_n1x a (a.key), PRESERVE T2_n1x b (b.key); Hive on Tez result: wrong +++ | a.key | b.key | +++ | aaa | aaa | | bbb | NULL | | ccc | ccc | | NULL | ddd | +++ Hive on MR result: right +++ | a.key | b.key | +++ | aaa | aaa | | bbb | NULL | | ccc | ccc | +++ SELECT a.key, b.key FROM UNIQUEJOIN T1_n1x a (a.key), T2_n1x b (b.key); Hive on Tez result: wrong +++ | a.key | b.key | +++ | aaa | aaa | | bbb | NULL | | ccc | ccc | | NULL | ddd | +++ Hive on MR result: right +++ | a.key | b.key | +++ | aaa | aaa | | ccc | ccc | +++ > The result of UNIQUEJOIN on Hive on Tez is inconsistent with that of MR > --- > > Key: HIVE-26018 > URL: https://issues.apache.org/jira/browse/HIVE-26018 > Project: Hive > Issue Type: Bug > Components: Tez >Affects Versions: 3.1.0, 4.0.0 >Reporter: GuangMing Lu >Priority: Major > Attachments: image-2022-03-09-21-08-17-835.png > > > The result of UNIQUEJOIN on Hive on Tez is inconsistent with that of MR, and > the result Is not correct, for example: > CREATE TABLE T1_n1x(key STRING, val STRING) STORED AS orc; > CREATE TABLE T2_n1x(key STRING, val STRING) STORED AS orc; > insert into T1_n1x values('aaa', '111'),('bbb', '222'),('ccc', '333'); > insert into T2_n1x values('aaa', '111'),('ddd', '444'),('ccc', '333'); > SELECT a.key, b.key FROM UNIQUEJOIN PRESERVE T1_n1x a (a.key), PRESERVE > T2_n1x b (b.key); > Hive on Tez result: wrong > {+}---{-}{-}{+}---+ > |a.key |b.key | > |aaa |aaa | > |bbb |NULL | > |ccc |ccc | > |NULL |ddd | > {+}---{-}{-}{+}---+ > Hive on MR result: right > {+}---{-}{-}{+}---+ > |a.key |b.key | > > |aaa |aaa | > |bbb |NULL | > |ccc |ccc | > {+}---{-}{-}{+}---+ > SELECT a.key, b.key FROM UNIQUEJOIN T1_n1x a (a.key), T2_n1x b (b.key); > Hive on Tez result: wrong > {+}---{-}{-}{+}---+ > |a.key |b.key | > {+}---{-}{-}{+}---+ > |aaa |aaa | > |bbb |NULL | > |ccc |ccc | > |NULL |ddd | > {+}---{-}{-}{+}---+ > Hive on MR result: right > {+}---{-}{-}{+}---+ > |a.key |b.key | > {+}---{-}{-}{+}---+ > |aaa |aaa | > |ccc |ccc | > {+}---{-}{-}{+}---+ > -- This message was sent by Atlassian Jira (v8.20.1#820001)
[jira] [Updated] (HIVE-25853) Security Vulnerability CVE-2021-44832 log4j2 need upgrade to 2.17.1
[ https://issues.apache.org/jira/browse/HIVE-25853?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] GuangMing Lu updated HIVE-25853: Issue Type: Bug (was: Improvement) > Security Vulnerability CVE-2021-44832 log4j2 need upgrade to 2.17.1 > --- > > Key: HIVE-25853 > URL: https://issues.apache.org/jira/browse/HIVE-25853 > Project: Hive > Issue Type: Bug >Affects Versions: 4.0.0 >Reporter: GuangMing Lu >Priority: Major > > Security Vulnerability CVE-2021-44832 log4j2 need upgrade to 2.17.1 -- This message was sent by Atlassian Jira (v8.20.1#820001)
[jira] [Updated] (HIVE-25853) Security Vulnerability CVE-2021-44832 log4j2 need upgrade to 2.17.1
[ https://issues.apache.org/jira/browse/HIVE-25853?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] GuangMing Lu updated HIVE-25853: Summary: Security Vulnerability CVE-2021-44832 log4j2 need upgrade to 2.17.1 (was: Security Vulnerability CVE-2021-44832 log4j2 upgrade to 2.17.1) > Security Vulnerability CVE-2021-44832 log4j2 need upgrade to 2.17.1 > --- > > Key: HIVE-25853 > URL: https://issues.apache.org/jira/browse/HIVE-25853 > Project: Hive > Issue Type: Improvement >Affects Versions: 4.0.0 >Reporter: GuangMing Lu >Priority: Major > > Security Vulnerability CVE-2021-44832 log4j2 need upgrade to 2.17.1 -- This message was sent by Atlassian Jira (v8.20.1#820001)
[jira] [Updated] (HIVE-25853) Security Vulnerability CVE-2021-44832 log4j2 upgrade to 2.17.1
[ https://issues.apache.org/jira/browse/HIVE-25853?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] GuangMing Lu updated HIVE-25853: Description: Security Vulnerability CVE-2021-44832 log4j2 need upgrade to 2.17.1 (was: Security Vulnerability CVE-2021-44832 log4j2 upgrade to 2.17.1) > Security Vulnerability CVE-2021-44832 log4j2 upgrade to 2.17.1 > -- > > Key: HIVE-25853 > URL: https://issues.apache.org/jira/browse/HIVE-25853 > Project: Hive > Issue Type: Improvement >Affects Versions: 4.0.0 >Reporter: GuangMing Lu >Priority: Major > > Security Vulnerability CVE-2021-44832 log4j2 need upgrade to 2.17.1 -- This message was sent by Atlassian Jira (v8.20.1#820001)
[jira] [Updated] (HIVE-25525) TestRetryingThriftCLIServiceClient test case optimization
[ https://issues.apache.org/jira/browse/HIVE-25525?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] GuangMing Lu updated HIVE-25525: Description: {code:java} //devil's Numbers cliServiceClient.openSession("anonymous", "anonymous"); client.openSession("anonymous", "anonymous"); {code} Client.opensession invoke initializes the user name and password for the devil's Numbers was:Client.opensession invoke initializes the user name and password for the devil's Numbers > TestRetryingThriftCLIServiceClient test case optimization > - > > Key: HIVE-25525 > URL: https://issues.apache.org/jira/browse/HIVE-25525 > Project: Hive > Issue Type: Improvement > Components: Tests >Affects Versions: 3.1.0, 3.1.2 >Reporter: GuangMing Lu >Priority: Major > > {code:java} > //devil's Numbers > cliServiceClient.openSession("anonymous", "anonymous"); > client.openSession("anonymous", "anonymous"); > {code} > Client.opensession invoke initializes the user name and password for the > devil's Numbers -- This message was sent by Atlassian Jira (v8.3.4#803005)
[jira] [Commented] (HIVE-20682) Async query execution can potentially fail if shared sessionHive is closed by master thread.
[ https://issues.apache.org/jira/browse/HIVE-20682?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17408892#comment-17408892 ] GuangMing Lu commented on HIVE-20682: - Hivs 3.1.0 does not match the current code, which is not shown in version 3.1.0 > Async query execution can potentially fail if shared sessionHive is closed by > master thread. > > > Key: HIVE-20682 > URL: https://issues.apache.org/jira/browse/HIVE-20682 > Project: Hive > Issue Type: Bug > Components: HiveServer2 >Affects Versions: 4.0.0 >Reporter: Sankar Hariappan >Assignee: Sankar Hariappan >Priority: Major > Labels: pull-request-available > Fix For: 4.0.0 > > Attachments: HIVE-20682.01.patch, HIVE-20682.02.patch, > HIVE-20682.03.patch, HIVE-20682.04.patch, HIVE-20682.05.patch, > HIVE-20682.06.patch > > > *Problem description:* > The master thread initializes the *sessionHive* object in *HiveSessionImpl* > class when we open a new session for a client connection and by default all > queries from this connection shares the same sessionHive object. > If the master thread executes a *synchronous* query, it closes the > sessionHive object (referred via thread local hiveDb) if > {{Hive.isCompatible}} returns false and sets new Hive object in thread local > HiveDb but doesn't change the sessionHive object in the session. Whereas, > *asynchronous* query execution via async threads never closes the sessionHive > object and it just creates a new one if needed and sets it as their thread > local hiveDb. > So, the problem can happen in the case where an *asynchronous* query is being > executed by async threads refers to sessionHive object and the master thread > receives a *synchronous* query that closes the same sessionHive object. > Also, each query execution overwrites the thread local hiveDb object to > sessionHive object which potentially leaks a metastore connection if the > previous synchronous query execution re-created the Hive object. > *Possible Fix:* > The *sessionHive* object could be shared my multiple threads and so it > shouldn't be allowed to be closed by any query execution threads when they > re-create the Hive object due to changes in Hive configurations. But the Hive > objects created by query execution threads should be closed when the thread > exits. > So, it is proposed to have an *isAllowClose* flag (default: *true*) in Hive > object which should be set to *false* for *sessionHive* and would be > forcefully closed when the session is closed or released. > Also, when we reset *sessionHive* object with new one due to changes in > *sessionConf*, the old one should be closed when no async thread is referring > to it. This can be done using "*finalize*" method of Hive object where we can > close HMS connection when Hive object is garbage collected. > cc [~pvary] -- This message was sent by Atlassian Jira (v8.3.4#803005)
[jira] [Updated] (HIVE-20682) Async query execution can potentially fail if shared sessionHive is closed by master thread.
[ https://issues.apache.org/jira/browse/HIVE-20682?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] GuangMing Lu updated HIVE-20682: Affects Version/s: (was: 3.1.0) > Async query execution can potentially fail if shared sessionHive is closed by > master thread. > > > Key: HIVE-20682 > URL: https://issues.apache.org/jira/browse/HIVE-20682 > Project: Hive > Issue Type: Bug > Components: HiveServer2 >Affects Versions: 4.0.0 >Reporter: Sankar Hariappan >Assignee: Sankar Hariappan >Priority: Major > Labels: pull-request-available > Fix For: 4.0.0 > > Attachments: HIVE-20682.01.patch, HIVE-20682.02.patch, > HIVE-20682.03.patch, HIVE-20682.04.patch, HIVE-20682.05.patch, > HIVE-20682.06.patch > > > *Problem description:* > The master thread initializes the *sessionHive* object in *HiveSessionImpl* > class when we open a new session for a client connection and by default all > queries from this connection shares the same sessionHive object. > If the master thread executes a *synchronous* query, it closes the > sessionHive object (referred via thread local hiveDb) if > {{Hive.isCompatible}} returns false and sets new Hive object in thread local > HiveDb but doesn't change the sessionHive object in the session. Whereas, > *asynchronous* query execution via async threads never closes the sessionHive > object and it just creates a new one if needed and sets it as their thread > local hiveDb. > So, the problem can happen in the case where an *asynchronous* query is being > executed by async threads refers to sessionHive object and the master thread > receives a *synchronous* query that closes the same sessionHive object. > Also, each query execution overwrites the thread local hiveDb object to > sessionHive object which potentially leaks a metastore connection if the > previous synchronous query execution re-created the Hive object. > *Possible Fix:* > The *sessionHive* object could be shared my multiple threads and so it > shouldn't be allowed to be closed by any query execution threads when they > re-create the Hive object due to changes in Hive configurations. But the Hive > objects created by query execution threads should be closed when the thread > exits. > So, it is proposed to have an *isAllowClose* flag (default: *true*) in Hive > object which should be set to *false* for *sessionHive* and would be > forcefully closed when the session is closed or released. > Also, when we reset *sessionHive* object with new one due to changes in > *sessionConf*, the old one should be closed when no async thread is referring > to it. This can be done using "*finalize*" method of Hive object where we can > close HMS connection when Hive object is garbage collected. > cc [~pvary] -- This message was sent by Atlassian Jira (v8.3.4#803005)
[jira] [Commented] (HIVE-20828) Upgrade to Spark 2.4.0
[ https://issues.apache.org/jira/browse/HIVE-20828?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17408106#comment-17408106 ] GuangMing Lu commented on HIVE-20828: - Hi [~stakiar], How is the hive on spark evolving? > Upgrade to Spark 2.4.0 > -- > > Key: HIVE-20828 > URL: https://issues.apache.org/jira/browse/HIVE-20828 > Project: Hive > Issue Type: Improvement > Components: Spark >Reporter: Sahil Takiar >Priority: Major > Attachments: HIVE-20828.1.patch, HIVE-20828.2.patch > > > The Spark community is in the process of releasing Spark 2.4.0. We should do > some testing with the RC candidates and then upgrade once the release is > finalized. -- This message was sent by Atlassian Jira (v8.3.4#803005)
[jira] [Updated] (HIVE-25315) SQL executed hiveserver is killed by the HiveServer2-Handler-Pool thread, very occasionally
[ https://issues.apache.org/jira/browse/HIVE-25315?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] GuangMing Lu updated HIVE-25315: Component/s: HiveServer2 > SQL executed hiveserver is killed by the HiveServer2-Handler-Pool thread, > very occasionally > --- > > Key: HIVE-25315 > URL: https://issues.apache.org/jira/browse/HIVE-25315 > Project: Hive > Issue Type: Bug > Components: HiveServer2 >Affects Versions: 3.1.0 >Reporter: GuangMing Lu >Priority: Major > > {code:java} > 2021-07-05 15:23:17,376 | INFO | HiveServer2-Handler-Pool: Thread-226765745 > | Shutting down HiveServer2 | > org.apache.hive.service.server.HiveServer2.stop(HiveServer2.java:1090) > 2021-07-05 15:23:17,376 | INFO | HiveServer2-Handler-Pool: Thread-226765745 > | Thrift server has stopped | > org.apache.hive.service.cli.thrift.ThriftBinaryCLIService.stopServer(ThriftBinaryCLIService.java:225) > 2021-07-05 15:23:17,376 | INFO | HiveServer2-Handler-Pool: Thread-226765745 > | Service:ThriftBinaryCLIService is stopped. | > org.apache.hive.service.AbstractService.stop(AbstractService.java:130) > 2021-07-05 15:23:17,376 | INFO | HiveServer2-Handler-Pool: Thread-226765745 > | Service:OperationManager is stopped. | > org.apache.hive.service.AbstractService.stop(AbstractService.java:130) > 2021-07-05 15:23:17,376 | INFO | HiveServer2-Handler-Pool: Thread-226765745 > | Service:SessionManager is stopped. | > org.apache.hive.service.AbstractService.stop(AbstractService.java:130) > {code} -- This message was sent by Atlassian Jira (v8.3.4#803005)
[jira] [Updated] (HIVE-25269) When the skew and parallel parameters are true simultaneously, the result is less data
[ https://issues.apache.org/jira/browse/HIVE-25269?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] GuangMing Lu updated HIVE-25269: Attachment: P10IDS_RISKLIST.zip p10ids_riskcon.zip p10ids_realpayrc_ygz.zip p10ids_prerec_split_ygz.zip comb_classcode.zip > When the skew and parallel parameters are true simultaneously, the result is > less data > -- > > Key: HIVE-25269 > URL: https://issues.apache.org/jira/browse/HIVE-25269 > Project: Hive > Issue Type: Bug > Components: Physical Optimizer, SQL >Affects Versions: 3.1.0, 3.1.2 >Reporter: GuangMing Lu >Priority: Major > Attachments: P10IDS_RISKLIST.zip, comb_classcode.zip, > p10ids_prerec_split_ygz.zip, p10ids_realpayrc_ygz.zip, p10ids_riskcon.zip, > test.sql > > > When the params of hive.optimize.skewjoin, hive.groupby.skewindata and > hive.exec.parallel are true, and exec sql such as 'INSERT... FROM (SUBQUERY > UNIONALL ...GROUP BY...) A JOIN/LEFT JOIN A.expression', result data will be > reduced. Details of SQL and test data can be found in the attachment -- This message was sent by Atlassian Jira (v8.3.4#803005)
[jira] [Updated] (HIVE-25269) When the skew and parallel parameters are true simultaneously, the result is less data
[ https://issues.apache.org/jira/browse/HIVE-25269?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] GuangMing Lu updated HIVE-25269: Attachment: test.sql > When the skew and parallel parameters are true simultaneously, the result is > less data > -- > > Key: HIVE-25269 > URL: https://issues.apache.org/jira/browse/HIVE-25269 > Project: Hive > Issue Type: Bug > Components: Physical Optimizer, SQL >Affects Versions: 3.1.0, 3.1.2 >Reporter: GuangMing Lu >Priority: Major > Attachments: test.sql > > > When the params of hive.optimize.skewjoin, hive.groupby.skewindata and > hive.exec.parallel are true, and exec sql such as 'INSERT... FROM (SUBQUERY > UNIONALL ...GROUP BY...) A JOIN/LEFT JOIN A.expression', result data will be > reduced. Details of SQL and test data can be found in the attachment -- This message was sent by Atlassian Jira (v8.3.4#803005)
[jira] [Updated] (HIVE-25269) When the skew and parallel parameters are true simultaneously, the result is less data
[ https://issues.apache.org/jira/browse/HIVE-25269?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] GuangMing Lu updated HIVE-25269: Attachment: (was: comb_classcode.data) > When the skew and parallel parameters are true simultaneously, the result is > less data > -- > > Key: HIVE-25269 > URL: https://issues.apache.org/jira/browse/HIVE-25269 > Project: Hive > Issue Type: Bug > Components: Physical Optimizer, SQL >Affects Versions: 3.1.0, 3.1.2 >Reporter: GuangMing Lu >Priority: Major > > When the params of hive.optimize.skewjoin, hive.groupby.skewindata and > hive.exec.parallel are true, and exec sql such as 'INSERT... FROM (SUBQUERY > UNIONALL ...GROUP BY...) A JOIN/LEFT JOIN A.expression', result data will be > reduced. Details of SQL and test data can be found in the attachment -- This message was sent by Atlassian Jira (v8.3.4#803005)
[jira] [Updated] (HIVE-25269) When the skew and parallel parameters are true simultaneously, the result is less data
[ https://issues.apache.org/jira/browse/HIVE-25269?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] GuangMing Lu updated HIVE-25269: Attachment: (was: 样例分析-表入数据.sql) > When the skew and parallel parameters are true simultaneously, the result is > less data > -- > > Key: HIVE-25269 > URL: https://issues.apache.org/jira/browse/HIVE-25269 > Project: Hive > Issue Type: Bug > Components: Physical Optimizer, SQL >Affects Versions: 3.1.0, 3.1.2 >Reporter: GuangMing Lu >Priority: Major > > When the params of hive.optimize.skewjoin, hive.groupby.skewindata and > hive.exec.parallel are true, and exec sql such as 'INSERT... FROM (SUBQUERY > UNIONALL ...GROUP BY...) A JOIN/LEFT JOIN A.expression', result data will be > reduced. Details of SQL and test data can be found in the attachment -- This message was sent by Atlassian Jira (v8.3.4#803005)
[jira] [Updated] (HIVE-25269) When the skew and parallel parameters are true simultaneously, the result is less data
[ https://issues.apache.org/jira/browse/HIVE-25269?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] GuangMing Lu updated HIVE-25269: Attachment: 样例分析-表入数据.sql > When the skew and parallel parameters are true simultaneously, the result is > less data > -- > > Key: HIVE-25269 > URL: https://issues.apache.org/jira/browse/HIVE-25269 > Project: Hive > Issue Type: Bug > Components: Physical Optimizer, SQL >Affects Versions: 3.1.0, 3.1.2 >Reporter: GuangMing Lu >Priority: Major > Attachments: comb_classcode.data, 样例分析-表入数据.sql > > > When the params of hive.optimize.skewjoin, hive.groupby.skewindata and > hive.exec.parallel are true, and exec sql such as 'INSERT... FROM (SUBQUERY > UNIONALL ...GROUP BY...) A JOIN/LEFT JOIN A.expression', result data will be > reduced. Details of SQL and test data can be found in the attachment -- This message was sent by Atlassian Jira (v8.3.4#803005)
[jira] [Updated] (HIVE-25269) When the skew and parallel parameters are true simultaneously, the result is less data
[ https://issues.apache.org/jira/browse/HIVE-25269?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] GuangMing Lu updated HIVE-25269: Attachment: (was: table_b_data.orc) > When the skew and parallel parameters are true simultaneously, the result is > less data > -- > > Key: HIVE-25269 > URL: https://issues.apache.org/jira/browse/HIVE-25269 > Project: Hive > Issue Type: Bug > Components: Physical Optimizer, SQL >Affects Versions: 3.1.0, 3.1.2 >Reporter: GuangMing Lu >Priority: Major > Attachments: comb_classcode.data, 样例分析-表入数据.sql > > > When the params of hive.optimize.skewjoin, hive.groupby.skewindata and > hive.exec.parallel are true, and exec sql such as 'INSERT... FROM (SUBQUERY > UNIONALL ...GROUP BY...) A JOIN/LEFT JOIN A.expression', result data will be > reduced. Details of SQL and test data can be found in the attachment -- This message was sent by Atlassian Jira (v8.3.4#803005)
[jira] [Updated] (HIVE-25269) When the skew and parallel parameters are true simultaneously, the result is less data
[ https://issues.apache.org/jira/browse/HIVE-25269?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] GuangMing Lu updated HIVE-25269: Attachment: (was: test.sql) > When the skew and parallel parameters are true simultaneously, the result is > less data > -- > > Key: HIVE-25269 > URL: https://issues.apache.org/jira/browse/HIVE-25269 > Project: Hive > Issue Type: Bug > Components: Physical Optimizer, SQL >Affects Versions: 3.1.0, 3.1.2 >Reporter: GuangMing Lu >Priority: Major > Attachments: comb_classcode.data, 样例分析-表入数据.sql > > > When the params of hive.optimize.skewjoin, hive.groupby.skewindata and > hive.exec.parallel are true, and exec sql such as 'INSERT... FROM (SUBQUERY > UNIONALL ...GROUP BY...) A JOIN/LEFT JOIN A.expression', result data will be > reduced. Details of SQL and test data can be found in the attachment -- This message was sent by Atlassian Jira (v8.3.4#803005)
[jira] [Updated] (HIVE-25269) When the skew and parallel parameters are true simultaneously, the result is less data
[ https://issues.apache.org/jira/browse/HIVE-25269?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] GuangMing Lu updated HIVE-25269: Attachment: (was: table_d_data.orc) > When the skew and parallel parameters are true simultaneously, the result is > less data > -- > > Key: HIVE-25269 > URL: https://issues.apache.org/jira/browse/HIVE-25269 > Project: Hive > Issue Type: Bug > Components: Physical Optimizer, SQL >Affects Versions: 3.1.0, 3.1.2 >Reporter: GuangMing Lu >Priority: Major > Attachments: comb_classcode.data, 样例分析-表入数据.sql > > > When the params of hive.optimize.skewjoin, hive.groupby.skewindata and > hive.exec.parallel are true, and exec sql such as 'INSERT... FROM (SUBQUERY > UNIONALL ...GROUP BY...) A JOIN/LEFT JOIN A.expression', result data will be > reduced. Details of SQL and test data can be found in the attachment -- This message was sent by Atlassian Jira (v8.3.4#803005)
[jira] [Updated] (HIVE-25269) When the skew and parallel parameters are true simultaneously, the result is less data
[ https://issues.apache.org/jira/browse/HIVE-25269?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] GuangMing Lu updated HIVE-25269: Attachment: comb_classcode.data > When the skew and parallel parameters are true simultaneously, the result is > less data > -- > > Key: HIVE-25269 > URL: https://issues.apache.org/jira/browse/HIVE-25269 > Project: Hive > Issue Type: Bug > Components: Physical Optimizer, SQL >Affects Versions: 3.1.0, 3.1.2 >Reporter: GuangMing Lu >Priority: Major > Attachments: comb_classcode.data, 样例分析-表入数据.sql > > > When the params of hive.optimize.skewjoin, hive.groupby.skewindata and > hive.exec.parallel are true, and exec sql such as 'INSERT... FROM (SUBQUERY > UNIONALL ...GROUP BY...) A JOIN/LEFT JOIN A.expression', result data will be > reduced. Details of SQL and test data can be found in the attachment -- This message was sent by Atlassian Jira (v8.3.4#803005)
[jira] [Updated] (HIVE-25269) When the skew and parallel parameters are true simultaneously, the result is less data
[ https://issues.apache.org/jira/browse/HIVE-25269?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] GuangMing Lu updated HIVE-25269: Attachment: (was: table_c_data.orc) > When the skew and parallel parameters are true simultaneously, the result is > less data > -- > > Key: HIVE-25269 > URL: https://issues.apache.org/jira/browse/HIVE-25269 > Project: Hive > Issue Type: Bug > Components: Physical Optimizer, SQL >Affects Versions: 3.1.0, 3.1.2 >Reporter: GuangMing Lu >Priority: Major > Attachments: comb_classcode.data, 样例分析-表入数据.sql > > > When the params of hive.optimize.skewjoin, hive.groupby.skewindata and > hive.exec.parallel are true, and exec sql such as 'INSERT... FROM (SUBQUERY > UNIONALL ...GROUP BY...) A JOIN/LEFT JOIN A.expression', result data will be > reduced. Details of SQL and test data can be found in the attachment -- This message was sent by Atlassian Jira (v8.3.4#803005)
[jira] [Updated] (HIVE-25269) When the skew and parallel parameters are true simultaneously, the result is less data
[ https://issues.apache.org/jira/browse/HIVE-25269?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] GuangMing Lu updated HIVE-25269: Attachment: (was: table_a_data.orc) > When the skew and parallel parameters are true simultaneously, the result is > less data > -- > > Key: HIVE-25269 > URL: https://issues.apache.org/jira/browse/HIVE-25269 > Project: Hive > Issue Type: Bug > Components: Physical Optimizer, SQL >Affects Versions: 3.1.0, 3.1.2 >Reporter: GuangMing Lu >Priority: Major > Attachments: comb_classcode.data, 样例分析-表入数据.sql > > > When the params of hive.optimize.skewjoin, hive.groupby.skewindata and > hive.exec.parallel are true, and exec sql such as 'INSERT... FROM (SUBQUERY > UNIONALL ...GROUP BY...) A JOIN/LEFT JOIN A.expression', result data will be > reduced. Details of SQL and test data can be found in the attachment -- This message was sent by Atlassian Jira (v8.3.4#803005)
[jira] [Commented] (HIVE-25239) Create the compression table but the properties Compressed is No
[ https://issues.apache.org/jira/browse/HIVE-25239?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17361387#comment-17361387 ] GuangMing Lu commented on HIVE-25239: - Hi [XixiHua|https://issues.apache.org/jira/secure/ViewProfile.jspa?name=honeyaya], It's not be solved, you can test in master branch. Compressed should be marked as the attribute value when building a table > Create the compression table but the properties Compressed is No > > > Key: HIVE-25239 > URL: https://issues.apache.org/jira/browse/HIVE-25239 > Project: Hive > Issue Type: Bug > Components: Hive >Affects Versions: 3.1.0 >Reporter: GuangMing Lu >Priority: Major > Labels: easyfix > Fix For: 4.0.0 > > Attachments: HIVE-25239.01.patch, image-2021-06-11-10-49-25-710.png > > > Create an ORC Snappy format table, call 'desc formatted table' found that > 'Compressed' is No, should need to display as YES > {quote}create database lgm; > create table lgm.test_tbl( > f1 int, > f2 string > ) stored as orc > TBLPROPERTIES("orc.compress"="snappy"); > desc formatted lgm.test_tbl; > !image-2021-06-11-10-49-25-710.png! > {quote} -- This message was sent by Atlassian Jira (v8.3.4#803005)
[jira] [Assigned] (HIVE-25239) Create the compression table but the properties Compressed is No
[ https://issues.apache.org/jira/browse/HIVE-25239?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] GuangMing Lu reassigned HIVE-25239: --- Assignee: (was: GuangMing Lu) > Create the compression table but the properties Compressed is No > > > Key: HIVE-25239 > URL: https://issues.apache.org/jira/browse/HIVE-25239 > Project: Hive > Issue Type: Bug > Components: Hive >Affects Versions: 3.1.0 >Reporter: GuangMing Lu >Priority: Major > Labels: easyfix > Fix For: 4.0.0 > > Attachments: HIVE-25239.01.patch, image-2021-06-11-10-49-25-710.png > > > Create an ORC Snappy format table, call 'desc formatted table' found that > 'Compressed' is No, should need to display as YES > {quote}create database lgm; > create table lgm.test_tbl( > f1 int, > f2 string > ) stored as orc > TBLPROPERTIES("orc.compress"="snappy"); > desc formatted lgm.test_tbl; > !image-2021-06-11-10-49-25-710.png! > {quote} -- This message was sent by Atlassian Jira (v8.3.4#803005)
[jira] [Updated] (HIVE-25239) Create the compression table but the properties Compressed is No
[ https://issues.apache.org/jira/browse/HIVE-25239?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] GuangMing Lu updated HIVE-25239: Attachment: HIVE-25239.01.patch Fix Version/s: 4.0.0 Assignee: GuangMing Lu Status: Patch Available (was: Open) > Create the compression table but the properties Compressed is No > > > Key: HIVE-25239 > URL: https://issues.apache.org/jira/browse/HIVE-25239 > Project: Hive > Issue Type: Bug > Components: Hive >Affects Versions: 3.1.0 >Reporter: GuangMing Lu >Assignee: GuangMing Lu >Priority: Major > Labels: easyfix > Fix For: 4.0.0 > > Attachments: HIVE-25239.01.patch, image-2021-06-11-10-49-25-710.png > > > Create an ORC Snappy format table, call 'desc formatted table' found that > 'Compressed' is No, should need to display as YES > {quote}create database lgm; > create table lgm.test_tbl( > f1 int, > f2 string > ) stored as orc > TBLPROPERTIES("orc.compress"="snappy"); > desc formatted lgm.test_tbl; > !image-2021-06-11-10-49-25-710.png! > {quote} -- This message was sent by Atlassian Jira (v8.3.4#803005)
[jira] [Updated] (HIVE-25239) Create the compression table but the properties Compressed is No
[ https://issues.apache.org/jira/browse/HIVE-25239?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] GuangMing Lu updated HIVE-25239: Summary: Create the compression table but the properties Compressed is No (was: Create the compression table but the compressed properties are no) > Create the compression table but the properties Compressed is No > > > Key: HIVE-25239 > URL: https://issues.apache.org/jira/browse/HIVE-25239 > Project: Hive > Issue Type: Bug > Components: Hive >Affects Versions: 3.1.0 >Reporter: GuangMing Lu >Priority: Major > Labels: easyfix > Attachments: image-2021-06-11-10-49-25-710.png > > > Create an ORC Snappy format table, call 'desc formatted table' found that > 'Compressed' is No, should need to display as YES > {quote}create database lgm; > create table lgm.test_tbl( > f1 int, > f2 string > ) stored as orc > TBLPROPERTIES("orc.compress"="snappy"); > desc formatted lgm.test_tbl; > !image-2021-06-11-10-49-25-710.png! > {quote} -- This message was sent by Atlassian Jira (v8.3.4#803005)
[jira] [Updated] (HIVE-24715) Increase bucketId range
[ https://issues.apache.org/jira/browse/HIVE-24715?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] GuangMing Lu updated HIVE-24715: Issue Type: Improvement (was: Bug) > Increase bucketId range > --- > > Key: HIVE-24715 > URL: https://issues.apache.org/jira/browse/HIVE-24715 > Project: Hive > Issue Type: Improvement > Components: HiveServer2 >Reporter: Attila Magyar >Assignee: Attila Magyar >Priority: Major > Labels: pull-request-available > Fix For: 4.0.0 > > Attachments: Bucket Id range increase.pdf > > Time Spent: 1h 20m > Remaining Estimate: 0h > -- This message was sent by Atlassian Jira (v8.3.4#803005)
[jira] [Updated] (HIVE-22098) Data loss occurs when multiple tables are join with different bucket_version
[ https://issues.apache.org/jira/browse/HIVE-22098?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] GuangMing Lu updated HIVE-22098: Attachment: join_test.sql > Data loss occurs when multiple tables are join with different bucket_version > > > Key: HIVE-22098 > URL: https://issues.apache.org/jira/browse/HIVE-22098 > Project: Hive > Issue Type: Bug > Components: Operators >Affects Versions: 3.1.0, 3.1.2 >Reporter: GuangMing Lu >Assignee: yongtaoliao >Priority: Blocker > Labels: data-loss, wrongresults > Attachments: HIVE-22098.1.patch, image-2019-08-12-18-45-15-771.png, > join_test.sql, table_a_data.orc, table_b_data.orc, table_c_data.orc > > > When different bucketVersion of tables do join and no of reducers is greater > than 2, the result is incorrect (*data loss*). > *Scenario 1*: Three tables join. The temporary result data of table_a in the > first table and table_b in the second table joins result is recorded as > tmp_a_b, When it joins with the third table, the bucket_version=2 of the > table created by default after hive-3.0.0, temporary data tmp_a_b initialized > the bucketVerison=-1, and then ReduceSinkOperator Verketison=-1 is joined. In > the init method, the hash algorithm of selecting join column is selected > according to bucketVersion. If bucketVersion = 2 and is not an acid > operation, it will acquired the new algorithm of hash. Otherwise, the old > algorithm of hash is acquired. Because of the inconsistency of the algorithm > of hash, the partition of data allocation caused are different. At stage of > Reducer, Data with the same key can not be paired resulting in data loss. > *Scenario 2*: create two test tables, create table > table_bucketversion_1(col_1 string, col_2 string) TBLPROPERTIES > ('bucketing_version'='1'); table_bucketversion_2(col_1 string, col_2 string) > TBLPROPERTIES ('bucketing_version'='2'); > when use table_bucketversion_1 to join table_bucketversion_2, partial result > data will be loss due to bucketVerison is different. > -- This message was sent by Atlassian Jira (v8.3.4#803005)
[jira] [Updated] (HIVE-22098) Data loss occurs when multiple tables are join with different bucket_version
[ https://issues.apache.org/jira/browse/HIVE-22098?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] GuangMing Lu updated HIVE-22098: Attachment: (was: join_test.sql) > Data loss occurs when multiple tables are join with different bucket_version > > > Key: HIVE-22098 > URL: https://issues.apache.org/jira/browse/HIVE-22098 > Project: Hive > Issue Type: Bug > Components: Operators >Affects Versions: 3.1.0, 3.1.2 >Reporter: GuangMing Lu >Assignee: yongtaoliao >Priority: Blocker > Labels: data-loss, wrongresults > Attachments: HIVE-22098.1.patch, image-2019-08-12-18-45-15-771.png, > join_test.sql, table_a_data.orc, table_b_data.orc, table_c_data.orc > > > When different bucketVersion of tables do join and no of reducers is greater > than 2, the result is incorrect (*data loss*). > *Scenario 1*: Three tables join. The temporary result data of table_a in the > first table and table_b in the second table joins result is recorded as > tmp_a_b, When it joins with the third table, the bucket_version=2 of the > table created by default after hive-3.0.0, temporary data tmp_a_b initialized > the bucketVerison=-1, and then ReduceSinkOperator Verketison=-1 is joined. In > the init method, the hash algorithm of selecting join column is selected > according to bucketVersion. If bucketVersion = 2 and is not an acid > operation, it will acquired the new algorithm of hash. Otherwise, the old > algorithm of hash is acquired. Because of the inconsistency of the algorithm > of hash, the partition of data allocation caused are different. At stage of > Reducer, Data with the same key can not be paired resulting in data loss. > *Scenario 2*: create two test tables, create table > table_bucketversion_1(col_1 string, col_2 string) TBLPROPERTIES > ('bucketing_version'='1'); table_bucketversion_2(col_1 string, col_2 string) > TBLPROPERTIES ('bucketing_version'='2'); > when use table_bucketversion_1 to join table_bucketversion_2, partial result > data will be loss due to bucketVerison is different. > -- This message was sent by Atlassian Jira (v8.3.4#803005)
[jira] [Commented] (HIVE-24060) When the CBO is false, NPE is thrown by an EXCEPT or INTERSECT execution
[ https://issues.apache.org/jira/browse/HIVE-24060?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17199373#comment-17199373 ] GuangMing Lu commented on HIVE-24060: - Hey [~dengzh] Such is the case, but hive-1.2.1 is available, which leads to incompatibility problems for some users, whether we need to consider it > When the CBO is false, NPE is thrown by an EXCEPT or INTERSECT execution > > > Key: HIVE-24060 > URL: https://issues.apache.org/jira/browse/HIVE-24060 > Project: Hive > Issue Type: Bug > Components: CBO, Hive >Affects Versions: 3.1.0, 3.1.2 >Reporter: GuangMing Lu >Priority: Major > > {code:java} > set hive.cbo.enable=false; > create table testtable(idx string, namex string) stored as orc; > insert into testtable values('123', 'aaa'), ('234', 'bbb'); > explain select a.idx from (select idx,namex from testtable intersect select > idx,namex from testtable) a > {code} > The execution throws a NullPointException: > {code:java} > 2020-08-24 15:12:24,261 | WARN | HiveServer2-Handler-Pool: Thread-345 | > Error executing statement: | > org.apache.hive.service.cli.thrift.ThriftCLIService.executeNewStatement(ThriftCLIService.java:1155) > org.apache.hive.service.cli.HiveSQLException: Error while compiling > statement: FAILED: NullPointerException null > at > org.apache.hive.service.cli.operation.Operation.toSQLException(Operation.java:341) > ~[hive-service-3.1.0.jar:3.1.0] > at > org.apache.hive.service.cli.operation.SQLOperation.prepare(SQLOperation.java:215) > ~[hive-service-3.1.0.jar:3.1.0] > at > org.apache.hive.service.cli.operation.SQLOperation.runInternal(SQLOperation.java:316) > ~[hive-service-3.1.0.jar:3.1.0] > at > org.apache.hive.service.cli.operation.Operation.run(Operation.java:253) > ~[hive-service-3.1.0.jar:3.1.0] > at > org.apache.hive.service.cli.session.HiveSessionImpl.executeStatementInternal(HiveSessionImpl.java:684) > ~[hive-service-3.1.0.jar:3.1.0] > at > org.apache.hive.service.cli.session.HiveSessionImpl.executeStatementAsync(HiveSessionImpl.java:670) > ~[hive-service-3.1.0.jar:3.1.0] > at > org.apache.hive.service.cli.CLIService.executeStatementAsync(CLIService.java:342) > ~[hive-service-3.1.0.jar:3.1.0] > at > org.apache.hive.service.cli.thrift.ThriftCLIService.executeNewStatement(ThriftCLIService.java:1144) > ~[hive-service-3.1.0.jar:3.1.0] > at > org.apache.hive.service.cli.thrift.ThriftCLIService.ExecuteStatement(ThriftCLIService.java:1280) > ~[hive-service-3.1.0.jar:3.1.0] > at > org.apache.hive.service.rpc.thrift.TCLIService$Processor$ExecuteStatement.getResult(TCLIService.java:1557) > ~[hive-service-rpc-3.1.0.jar:3.1.0] > at > org.apache.hive.service.rpc.thrift.TCLIService$Processor$ExecuteStatement.getResult(TCLIService.java:1542) > ~[hive-service-rpc-3.1.0.jar:3.1.0] > at org.apache.thrift.ProcessFunction.process(ProcessFunction.java:39) > ~[libthrift-0.9.3.jar:0.9.3] > at org.apache.thrift.TBaseProcessor.process(TBaseProcessor.java:39) > ~[libthrift-0.9.3.jar:0.9.3] > at > org.apache.hadoop.hive.metastore.security.HadoopThriftAuthBridge$Server$TUGIAssumingProcessor.process(HadoopThriftAuthBridge.java:648) > ~[hive-standalone-metastore-3.1.0.jar:3.1.0] > at > org.apache.thrift.server.TThreadPoolServer$WorkerProcess.run(TThreadPoolServer.java:286) > ~[libthrift-0.9.3.jar:0.9.3] > at > java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) > ~[?:1.8.0_201] > at > java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) > ~[?:1.8.0_201] > at java.lang.Thread.run(Thread.java:748) [?:1.8.0_201] > Caused by: java.lang.NullPointerException > at > org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.genSelectPlan(SemanticAnalyzer.java:4367) > ~[hive-exec-3.1.0.jar:3.1.0] > at > org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.genSelectPlan(SemanticAnalyzer.java:4346) > ~[hive-exec-3.1.0.jar:3.1.0] > at > org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.genPostGroupByBodyPlan(SemanticAnalyzer.java:10576) > ~[hive-exec-3.1.0.jar:3.1.0] > at > org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.genBodyPlan(SemanticAnalyzer.java:10515) > ~[hive-exec-3.1.0.jar:3.1.0] > at > org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.genPlan(SemanticAnalyzer.java:11434) > ~[hive-exec-3.1.0.jar:3.1.0] > at > org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.genPlan(SemanticAnalyzer.java:11291) > ~[hive-exec-3.1.0.jar:3.1.0] > at > org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.genPlan(SemanticAnalyzer.java:11318) > ~[hive-exec-3.1.0.jar:3.1.0] > at > org
[jira] [Updated] (HIVE-24186) The aggregate class operation fails when the CBO is false
[ https://issues.apache.org/jira/browse/HIVE-24186?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] GuangMing Lu updated HIVE-24186: Affects Version/s: 3.1.2 > The aggregate class operation fails when the CBO is false > - > > Key: HIVE-24186 > URL: https://issues.apache.org/jira/browse/HIVE-24186 > Project: Hive > Issue Type: Bug > Components: CBO, SQL >Affects Versions: 3.1.0, 3.1.2 >Reporter: GuangMing Lu >Priority: Major > > {code:java} > create table table_1 > ( > idx string, > namex string > ) stored as orc; > create table table_2 > ( > sid string, > sname string > )stored as orc; > set hive.cbo.enable=false; > explain > insert into table table_1(idx , namex) > select t.sid idx, '123' namex > from table_2 t > group by t.sid > order by 1,2; > {code} > Executing the above SQL will report an error, errors as follows: > {code:java} > org.apache.hive.service.cli.HiveSQLException: Error while compiling > statement: FAILED: SemanticException [Error 10004]: Line 4:7 Invalid table > alias or column reference 't': (possible column names are: _col0, _col1) > at > org.apache.hive.service.cli.operation.Operation.toSQLException(Operation.java:341) > ~[hive-service-3.1.0-hw-ei-302001-SNAPSHOT.jar:3.1.0-hw-ei-302001-SNAPSHOT] > at > org.apache.hive.service.cli.operation.SQLOperation.prepare(SQLOperation.java:215) > ~[hive-service-3.1.0-hw-ei-302001-SNAPSHOT.jar:3.1.0-hw-ei-302001-SNAPSHOT] > at > org.apache.hive.service.cli.operation.SQLOperation.runInternal(SQLOperation.java:316) > ~[hive-service-3.1.0-hw-ei-302001-SNAPSHOT.jar:3.1.0-hw-ei-302001-SNAPSHOT] > at > org.apache.hive.service.cli.operation.Operation.run(Operation.java:253) > ~[hive-service-3.1.0-hw-ei-302001-SNAPSHOT.jar:3.1.0-hw-ei-302001-SNAPSHOT] > at > org.apache.hive.service.cli.session.HiveSessionImpl.executeStatementInternal(HiveSessionImpl.java:684) > ~[hive-service-3.1.0-hw-ei-302001-SNAPSHOT.jar:3.1.0-hw-ei-302001-SNAPSHOT] > at > org.apache.hive.service.cli.session.HiveSessionImpl.executeStatementAsync(HiveSessionImpl.java:670) > ~[hive-service-3.1.0-hw-ei-302001-SNAPSHOT.jar:3.1.0-hw-ei-302001-SNAPSHOT] > at sun.reflect.GeneratedMethodAccessor151.invoke(Unknown Source) > ~[?:?] > at > sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) > ~[?:1.8.0_242] > at java.lang.reflect.Method.invoke(Method.java:498) ~[?:1.8.0_242] > at > org.apache.hive.service.cli.session.HiveSessionProxy.invoke(HiveSessionProxy.java:78) > ~[hive-service-3.1.0-hw-ei-302001-SNAPSHOT.jar:3.1.0-hw-ei-302001-SNAPSHOT] > at > org.apache.hive.service.cli.session.HiveSessionProxy.access$000(HiveSessionProxy.java:36) > ~[hive-service-3.1.0-hw-ei-302001-SNAPSHOT.jar:3.1.0-hw-ei-302001-SNAPSHOT] > at > org.apache.hive.service.cli.session.HiveSessionProxy$1.run(HiveSessionProxy.java:63) > ~[hive-service-3.1.0-hw-ei-302001-SNAPSHOT.jar:3.1.0-hw-ei-302001-SNAPSHOT] > at java.security.AccessController.doPrivileged(Native Method) > ~[?:1.8.0_242] > at javax.security.auth.Subject.doAs(Subject.java:422) ~[?:1.8.0_242] > at > org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1737) > ~[hadoop-common-3.1.1-hw-ei-302001-SNAPSHOT.jar:?] > at > org.apache.hive.service.cli.session.HiveSessionProxy.invoke(HiveSessionProxy.java:59) > ~[hive-service-3.1.0-hw-ei-302001-SNAPSHOT.jar:3.1.0-hw-ei-302001-SNAPSHOT] > at com.sun.proxy.$Proxy66.executeStatementAsync(Unknown Source) ~[?:?] > at > org.apache.hive.service.cli.CLIService.executeStatementAsync(CLIService.java:342) > ~[hive-service-3.1.0-hw-ei-302001-SNAPSHOT.jar:3.1.0-hw-ei-302001-SNAPSHOT] > at > org.apache.hive.service.cli.thrift.ThriftCLIService.executeNewStatement(ThriftCLIService.java:1144) > ~[hive-service-3.1.0-hw-ei-302001-SNAPSHOT.jar:3.1.0-hw-ei-302001-SNAPSHOT] > at > org.apache.hive.service.cli.thrift.ThriftCLIService.ExecuteStatement(ThriftCLIService.java:1280) > ~[hive-service-3.1.0-hw-ei-302001-SNAPSHOT.jar:3.1.0-hw-ei-302001-SNAPSHOT] > at > org.apache.hive.service.rpc.thrift.TCLIService$Processor$ExecuteStatement.getResult(TCLIService.java:1557) > > ~[hive-service-rpc-3.1.0-hw-ei-302001-SNAPSHOT.jar:3.1.0-hw-ei-302001-SNAPSHOT] > at > org.apache.hive.service.rpc.thrift.TCLIService$Processor$ExecuteStatement.getResult(TCLIService.java:1542) > > ~[hive-service-rpc-3.1.0-hw-ei-302001-SNAPSHOT.jar:3.1.0-hw-ei-302001-SNAPSHOT] > at org.apache.thrift.ProcessFunction.process(ProcessFunction.java:39) > ~[hive-exec-3.1.0-hw-ei-302001-SNAPSHOT.jar:3.1.0-hw-ei-302001-SNAPSHOT] > at org.apache.thrift.TBaseProcessor.process(TBaseProcesso
[jira] [Updated] (HIVE-24186) The aggregate class operation fails when the CBO is false
[ https://issues.apache.org/jira/browse/HIVE-24186?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] GuangMing Lu updated HIVE-24186: Fix Version/s: (was: 3.1.2) (was: 3.1.0) > The aggregate class operation fails when the CBO is false > - > > Key: HIVE-24186 > URL: https://issues.apache.org/jira/browse/HIVE-24186 > Project: Hive > Issue Type: Bug > Components: CBO, SQL >Affects Versions: 3.1.0 >Reporter: GuangMing Lu >Priority: Major > > {code:java} > create table table_1 > ( > idx string, > namex string > ) stored as orc; > create table table_2 > ( > sid string, > sname string > )stored as orc; > set hive.cbo.enable=false; > explain > insert into table table_1(idx , namex) > select t.sid idx, '123' namex > from table_2 t > group by t.sid > order by 1,2; > {code} > Executing the above SQL will report an error, errors as follows: > {code:java} > org.apache.hive.service.cli.HiveSQLException: Error while compiling > statement: FAILED: SemanticException [Error 10004]: Line 4:7 Invalid table > alias or column reference 't': (possible column names are: _col0, _col1) > at > org.apache.hive.service.cli.operation.Operation.toSQLException(Operation.java:341) > ~[hive-service-3.1.0-hw-ei-302001-SNAPSHOT.jar:3.1.0-hw-ei-302001-SNAPSHOT] > at > org.apache.hive.service.cli.operation.SQLOperation.prepare(SQLOperation.java:215) > ~[hive-service-3.1.0-hw-ei-302001-SNAPSHOT.jar:3.1.0-hw-ei-302001-SNAPSHOT] > at > org.apache.hive.service.cli.operation.SQLOperation.runInternal(SQLOperation.java:316) > ~[hive-service-3.1.0-hw-ei-302001-SNAPSHOT.jar:3.1.0-hw-ei-302001-SNAPSHOT] > at > org.apache.hive.service.cli.operation.Operation.run(Operation.java:253) > ~[hive-service-3.1.0-hw-ei-302001-SNAPSHOT.jar:3.1.0-hw-ei-302001-SNAPSHOT] > at > org.apache.hive.service.cli.session.HiveSessionImpl.executeStatementInternal(HiveSessionImpl.java:684) > ~[hive-service-3.1.0-hw-ei-302001-SNAPSHOT.jar:3.1.0-hw-ei-302001-SNAPSHOT] > at > org.apache.hive.service.cli.session.HiveSessionImpl.executeStatementAsync(HiveSessionImpl.java:670) > ~[hive-service-3.1.0-hw-ei-302001-SNAPSHOT.jar:3.1.0-hw-ei-302001-SNAPSHOT] > at sun.reflect.GeneratedMethodAccessor151.invoke(Unknown Source) > ~[?:?] > at > sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) > ~[?:1.8.0_242] > at java.lang.reflect.Method.invoke(Method.java:498) ~[?:1.8.0_242] > at > org.apache.hive.service.cli.session.HiveSessionProxy.invoke(HiveSessionProxy.java:78) > ~[hive-service-3.1.0-hw-ei-302001-SNAPSHOT.jar:3.1.0-hw-ei-302001-SNAPSHOT] > at > org.apache.hive.service.cli.session.HiveSessionProxy.access$000(HiveSessionProxy.java:36) > ~[hive-service-3.1.0-hw-ei-302001-SNAPSHOT.jar:3.1.0-hw-ei-302001-SNAPSHOT] > at > org.apache.hive.service.cli.session.HiveSessionProxy$1.run(HiveSessionProxy.java:63) > ~[hive-service-3.1.0-hw-ei-302001-SNAPSHOT.jar:3.1.0-hw-ei-302001-SNAPSHOT] > at java.security.AccessController.doPrivileged(Native Method) > ~[?:1.8.0_242] > at javax.security.auth.Subject.doAs(Subject.java:422) ~[?:1.8.0_242] > at > org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1737) > ~[hadoop-common-3.1.1-hw-ei-302001-SNAPSHOT.jar:?] > at > org.apache.hive.service.cli.session.HiveSessionProxy.invoke(HiveSessionProxy.java:59) > ~[hive-service-3.1.0-hw-ei-302001-SNAPSHOT.jar:3.1.0-hw-ei-302001-SNAPSHOT] > at com.sun.proxy.$Proxy66.executeStatementAsync(Unknown Source) ~[?:?] > at > org.apache.hive.service.cli.CLIService.executeStatementAsync(CLIService.java:342) > ~[hive-service-3.1.0-hw-ei-302001-SNAPSHOT.jar:3.1.0-hw-ei-302001-SNAPSHOT] > at > org.apache.hive.service.cli.thrift.ThriftCLIService.executeNewStatement(ThriftCLIService.java:1144) > ~[hive-service-3.1.0-hw-ei-302001-SNAPSHOT.jar:3.1.0-hw-ei-302001-SNAPSHOT] > at > org.apache.hive.service.cli.thrift.ThriftCLIService.ExecuteStatement(ThriftCLIService.java:1280) > ~[hive-service-3.1.0-hw-ei-302001-SNAPSHOT.jar:3.1.0-hw-ei-302001-SNAPSHOT] > at > org.apache.hive.service.rpc.thrift.TCLIService$Processor$ExecuteStatement.getResult(TCLIService.java:1557) > > ~[hive-service-rpc-3.1.0-hw-ei-302001-SNAPSHOT.jar:3.1.0-hw-ei-302001-SNAPSHOT] > at > org.apache.hive.service.rpc.thrift.TCLIService$Processor$ExecuteStatement.getResult(TCLIService.java:1542) > > ~[hive-service-rpc-3.1.0-hw-ei-302001-SNAPSHOT.jar:3.1.0-hw-ei-302001-SNAPSHOT] > at org.apache.thrift.ProcessFunction.process(ProcessFunction.java:39) > ~[hive-exec-3.1.0-hw-ei-302001-SNAPSHOT.jar:3.1.0-hw-ei-302001-SNAPSHOT] > at org.apache.thrift.
[jira] [Assigned] (HIVE-24122) When CBO is enable, CAST(STR as Bigint)IS NOT NULL result is wrong
[ https://issues.apache.org/jira/browse/HIVE-24122?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] GuangMing Lu reassigned HIVE-24122: --- Assignee: GuangMing Lu > When CBO is enable, CAST(STR as Bigint)IS NOT NULL result is wrong > --- > > Key: HIVE-24122 > URL: https://issues.apache.org/jira/browse/HIVE-24122 > Project: Hive > Issue Type: Bug > Components: CBO >Affects Versions: 3.1.0, 3.1.2 >Reporter: GuangMing Lu >Assignee: GuangMing Lu >Priority: Major > Fix For: 4.0.0 > > > {code:java} > create database testdb; > CREATE TABLE IF NOT EXISTS testdb.z_tab > ( > SEARCHWORD STRING, > COUNT_NUM BIGINT, > WORDS STRING > ) > ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' > STORED AS TEXTFILE; > insert into table testdb.z_tab > values('hivetest',111,'aaa'),('hivetest2',111,'bbb'); > set hive.cbo.enable=true; > SELECT CAST(searchword as bigint) IS NOT NULL FROM testdb.z_tab; > SELECT CAST(searchword as bigint) IS NULL FROM testdb.z_tab; > {code} > The SQL results for both queries are the same, as follows: > {noformat} > +---+ > | _c0 | > +---+ > | true | > | true | > +---+{noformat} > SELECT CAST(searchword as bigint) IS NOT NULL FROM testdb.z_tab; execute > result is wrong > -- This message was sent by Atlassian Jira (v8.3.4#803005)
[jira] [Resolved] (HIVE-24122) When CBO is enable, CAST(STR as Bigint)IS NOT NULL result is wrong
[ https://issues.apache.org/jira/browse/HIVE-24122?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] GuangMing Lu resolved HIVE-24122. - Fix Version/s: 4.0.0 Resolution: Fixed > When CBO is enable, CAST(STR as Bigint)IS NOT NULL result is wrong > --- > > Key: HIVE-24122 > URL: https://issues.apache.org/jira/browse/HIVE-24122 > Project: Hive > Issue Type: Bug > Components: CBO >Affects Versions: 3.1.0, 3.1.2 >Reporter: GuangMing Lu >Priority: Major > Fix For: 4.0.0 > > > {code:java} > create database testdb; > CREATE TABLE IF NOT EXISTS testdb.z_tab > ( > SEARCHWORD STRING, > COUNT_NUM BIGINT, > WORDS STRING > ) > ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' > STORED AS TEXTFILE; > insert into table testdb.z_tab > values('hivetest',111,'aaa'),('hivetest2',111,'bbb'); > set hive.cbo.enable=true; > SELECT CAST(searchword as bigint) IS NOT NULL FROM testdb.z_tab; > SELECT CAST(searchword as bigint) IS NULL FROM testdb.z_tab; > {code} > The SQL results for both queries are the same, as follows: > {noformat} > +---+ > | _c0 | > +---+ > | true | > | true | > +---+{noformat} > SELECT CAST(searchword as bigint) IS NOT NULL FROM testdb.z_tab; execute > result is wrong > -- This message was sent by Atlassian Jira (v8.3.4#803005)
[jira] [Comment Edited] (HIVE-24122) When CBO is enable, CAST(STR as Bigint)IS NOT NULL result is wrong
[ https://issues.apache.org/jira/browse/HIVE-24122?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17199362#comment-17199362 ] GuangMing Lu edited comment on HIVE-24122 at 9/21/20, 12:44 PM: Hey {color:#0066cc} [~zabetak] {color} {color:#0066cc}{color:#172b4d} Thanks for reminding me that I was test in the master is ok, the reason why the master used calcite-1.21.{color} {color} After analysis, the problem was fixed in calcite 1.19 or above was (Author: luguangming): Hey {color:#0066cc} [~zabetak] {color:#172b4d} Thanks for reminding me that I was test in the master is ok, the reason why the master used calcite-1.21. {color}{color} {color:#0066cc}{color:#172b4d}After analysis, the problem was fixed in calcite 1.19 or above{color}{color} > When CBO is enable, CAST(STR as Bigint)IS NOT NULL result is wrong > --- > > Key: HIVE-24122 > URL: https://issues.apache.org/jira/browse/HIVE-24122 > Project: Hive > Issue Type: Bug > Components: CBO >Affects Versions: 3.1.0, 3.1.2 >Reporter: GuangMing Lu >Priority: Major > > {code:java} > create database testdb; > CREATE TABLE IF NOT EXISTS testdb.z_tab > ( > SEARCHWORD STRING, > COUNT_NUM BIGINT, > WORDS STRING > ) > ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' > STORED AS TEXTFILE; > insert into table testdb.z_tab > values('hivetest',111,'aaa'),('hivetest2',111,'bbb'); > set hive.cbo.enable=true; > SELECT CAST(searchword as bigint) IS NOT NULL FROM testdb.z_tab; > SELECT CAST(searchword as bigint) IS NULL FROM testdb.z_tab; > {code} > The SQL results for both queries are the same, as follows: > {noformat} > +---+ > | _c0 | > +---+ > | true | > | true | > +---+{noformat} > SELECT CAST(searchword as bigint) IS NOT NULL FROM testdb.z_tab; execute > result is wrong > -- This message was sent by Atlassian Jira (v8.3.4#803005)
[jira] [Commented] (HIVE-24122) When CBO is enable, CAST(STR as Bigint)IS NOT NULL result is wrong
[ https://issues.apache.org/jira/browse/HIVE-24122?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17199362#comment-17199362 ] GuangMing Lu commented on HIVE-24122: - Hey {color:#0066cc} [~zabetak] {color:#172b4d} Thanks for reminding me that I was test in the master is ok, the reason why the master used calcite-1.21. {color}{color} {color:#0066cc}{color:#172b4d}After analysis, the problem was fixed in calcite 1.19 or above{color}{color} > When CBO is enable, CAST(STR as Bigint)IS NOT NULL result is wrong > --- > > Key: HIVE-24122 > URL: https://issues.apache.org/jira/browse/HIVE-24122 > Project: Hive > Issue Type: Bug > Components: CBO >Affects Versions: 3.1.0, 3.1.2 >Reporter: GuangMing Lu >Priority: Major > > {code:java} > create database testdb; > CREATE TABLE IF NOT EXISTS testdb.z_tab > ( > SEARCHWORD STRING, > COUNT_NUM BIGINT, > WORDS STRING > ) > ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' > STORED AS TEXTFILE; > insert into table testdb.z_tab > values('hivetest',111,'aaa'),('hivetest2',111,'bbb'); > set hive.cbo.enable=true; > SELECT CAST(searchword as bigint) IS NOT NULL FROM testdb.z_tab; > SELECT CAST(searchword as bigint) IS NULL FROM testdb.z_tab; > {code} > The SQL results for both queries are the same, as follows: > {noformat} > +---+ > | _c0 | > +---+ > | true | > | true | > +---+{noformat} > SELECT CAST(searchword as bigint) IS NOT NULL FROM testdb.z_tab; execute > result is wrong > -- This message was sent by Atlassian Jira (v8.3.4#803005)
[jira] [Updated] (HIVE-24124) NPE occurs when bucket_version different bucket tables are joined
[ https://issues.apache.org/jira/browse/HIVE-24124?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] GuangMing Lu updated HIVE-24124: Description: {code:java} create table z_tab_1( task_id string, data_date string, accno string, curr_type string, ifrs9_pd12_value double, ifrs9_ccf_value double, ifrs9_lgd_value double )partitioned by(pt_dt string) STORED AS ORCFILE TBLPROPERTIES ('bucketing_version'='1'); alter table z_tab_1 add partition(pt_dt = '2020-7-31'); insert into z_tab_1 partition(pt_dt = '2020-7-31') values ('123','2020-7-31','accno-','curr_type-x', 0.1, 0.2 ,0.3), ('1','2020-1-31','a','1-curr_type-a', 0.1, 0.2 ,0.3), ('2','2020-2-31','b','2-curr_type-b', 0.1, 0.2 ,0.3), ('3','2020-3-31','c','3-curr_type-c', 0.1, 0.2 ,0.3), ('4','2020-4-31','d','4-curr_type-d', 0.1, 0.2 ,0.3), ('5','2020-5-31','e','5-curr_type-e', 0.1, 0.2 ,0.3), ('6','2020-6-31','f','6-curr_type-f', 0.1, 0.2 ,0.3), ('7','2020-7-31','g','7-curr_type-g', 0.1, 0.2 ,0.3), ('8','2020-8-31','h','8-curr_type-h', 0.1, 0.2 ,0.3), ('9','2020-9-31','i','9-curr_type-i', 0.1, 0.2 ,0.3); drop table if exists z_tab_2; CREATE TABLE z_tab_2( task_id string, data_date string, accno string, curr_type string, ifrs9_pd12_value double, ifrs9_ccf_value double, ifrs9_lgd_value double ) CLUSTERED BY (TASK_ID, DATA_DATE, ACCNO, CURR_TYPE) SORTED by (TASK_ID, ACCNO, CURR_TYPE) INTO 2000 BUCKETS ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' STORED AS ORCFILE; set hive.enforce.bucketing=true; INSERT OVERWRITE TABLE z_tab_2 SELECT DCCR.TASK_ID ,DCCR.DATA_DATE ,DCCR.ACCNO ,DCCR.CURR_TYPE ,DCCR.IFRS9_PD12_VALUE ,DCCR.IFRS9_CCF_VALUE ,DCCR.IFRS9_LGD_VALUE FROM z_tab_1 DCCR WHERE pt_dt = '2020-7-31'; {code} {noformat} Caused by: java.lang.NullPointerException at org.apache.hadoop.hive.ql.exec.FileSinkOperator.findWriterOffset(FileSinkOperator.java:1072) at org.apache.hadoop.hive.ql.exec.FileSinkOperator.process(FileSinkOperator.java:988) at org.apache.hadoop.hive.ql.exec.Operator.baseForward(Operator.java:995) at org.apache.hadoop.hive.ql.exec.Operator.forward(Operator.java:941) at org.apache.hadoop.hive.ql.exec.Operator.forward(Operator.java:928) at org.apache.hadoop.hive.ql.exec.SelectOperator.process(SelectOperator.java:95) at org.apache.hadoop.hive.ql.exec.mr.ExecReducer.reduce(ExecReducer.java:237) ... 7 more{noformat} was: {code:java} create table z_tab_1( task_id string, data_date string, accno string, curr_type string, ifrs9_pd12_value double, ifrs9_ccf_value double, ifrs9_lgd_value double )partitioned by(pt_dt string) STORED AS ORCFILE TBLPROPERTIES ('bucketing_version'='1');alter table z_tab_1 add partition(pt_dt = '2020-7-31'); insert into z_tab_1 partition(pt_dt = '2020-7-31') values('123','2020-7-31','accno-','curr_type-x', 0.1, 0.2 ,0.3), ('1','2020-1-31','a','1-curr_type-a', 0.1, 0.2 ,0.3), ('2','2020-2-31','b','2-curr_type-b', 0.1, 0.2 ,0.3), ('3','2020-3-31','c','3-curr_type-c', 0.1, 0.2 ,0.3), ('4','2020-4-31','d','4-curr_type-d', 0.1, 0.2 ,0.3), ('5','2020-5-31','e','5-curr_type-e', 0.1, 0.2 ,0.3), ('6','2020-6-31','f','6-curr_type-f', 0.1, 0.2 ,0.3), ('7','2020-7-31','g','7-curr_type-g', 0.1, 0.2 ,0.3), ('8','2020-8-31','h','8-curr_type-h', 0.1, 0.2 ,0.3), ('9','2020-9-31','i','9-curr_type-i', 0.1, 0.2 ,0.3); drop table if exists z_tab_2; CREATE TABLE z_tab_2( task_id string, data_date string, accno string, curr_type string, ifrs9_pd12_value double, ifrs9_ccf_value double, ifrs9_lgd_value double ) CLUSTERED BY (TASK_ID, DATA_DATE, ACCNO, CURR_TYPE) SORTED by (TASK_ID, ACCNO, CURR_TYPE) INTO 2000 BUCKETS ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' STORED AS ORCFILE; set hive.enforce.bucketing=true; INSERT OVERWRITE TABLE z_tab_2 SELECT DCCR.TASK_ID ,DCCR.DATA_DATE ,DCCR.ACCNO ,DCCR.CURR_TYPE ,DCCR.IFRS9_PD12_VALUE ,DCCR.IFRS9_CCF_VALUE ,DCCR.IFRS9_LGD_VALUE FROM z_tab_1 DCCR WHERE pt_dt = '2020-7-31'; {code} {noformat} Caused by: java.lang.NullPointerException at org.apache.hadoop.hive.ql.exec.FileSinkOperator.findWriterOffset(FileSinkOperator.java:1072) at org.apache.hadoop.hive.ql.exec.FileSinkOperator.process(FileSinkOperator.java:988) at org.apache.hadoop.hive.ql.exec.Operator.baseForward(Operator.java:995) at org.apache.hadoop.hive.ql.exec.Operator.forward(Operator.java:941) at org.apache.hadoop.hive.ql.exec.Operator.forward(Operator.java:928) at org.apache.hadoop.hive.ql.exec.SelectOperator.process(SelectOperator.java:95) at org.apache.hadoop.hive.ql.exec.mr.ExecReduce
[jira] [Updated] (HIVE-24124) NPE occurs when bucket_version different bucket tables are joined
[ https://issues.apache.org/jira/browse/HIVE-24124?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] GuangMing Lu updated HIVE-24124: Description: {code:java} create table z_tab_1( task_id string, data_date string, accno string, curr_type string, ifrs9_pd12_value double, ifrs9_ccf_value double, ifrs9_lgd_value double )partitioned by(pt_dt string) STORED AS ORCFILE TBLPROPERTIES ('bucketing_version'='1');alter table z_tab_1 add partition(pt_dt = '2020-7-31'); insert into z_tab_1 partition(pt_dt = '2020-7-31') values('123','2020-7-31','accno-','curr_type-x', 0.1, 0.2 ,0.3), ('1','2020-1-31','a','1-curr_type-a', 0.1, 0.2 ,0.3), ('2','2020-2-31','b','2-curr_type-b', 0.1, 0.2 ,0.3), ('3','2020-3-31','c','3-curr_type-c', 0.1, 0.2 ,0.3), ('4','2020-4-31','d','4-curr_type-d', 0.1, 0.2 ,0.3), ('5','2020-5-31','e','5-curr_type-e', 0.1, 0.2 ,0.3), ('6','2020-6-31','f','6-curr_type-f', 0.1, 0.2 ,0.3), ('7','2020-7-31','g','7-curr_type-g', 0.1, 0.2 ,0.3), ('8','2020-8-31','h','8-curr_type-h', 0.1, 0.2 ,0.3), ('9','2020-9-31','i','9-curr_type-i', 0.1, 0.2 ,0.3); drop table if exists z_tab_2; CREATE TABLE z_tab_2( task_id string, data_date string, accno string, curr_type string, ifrs9_pd12_value double, ifrs9_ccf_value double, ifrs9_lgd_value double ) CLUSTERED BY (TASK_ID, DATA_DATE, ACCNO, CURR_TYPE) SORTED by (TASK_ID, ACCNO, CURR_TYPE) INTO 2000 BUCKETS ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' STORED AS ORCFILE; set hive.enforce.bucketing=true; INSERT OVERWRITE TABLE z_tab_2 SELECT DCCR.TASK_ID ,DCCR.DATA_DATE ,DCCR.ACCNO ,DCCR.CURR_TYPE ,DCCR.IFRS9_PD12_VALUE ,DCCR.IFRS9_CCF_VALUE ,DCCR.IFRS9_LGD_VALUE FROM z_tab_1 DCCR WHERE pt_dt = '2020-7-31'; {code} {noformat} Caused by: java.lang.NullPointerException at org.apache.hadoop.hive.ql.exec.FileSinkOperator.findWriterOffset(FileSinkOperator.java:1072) at org.apache.hadoop.hive.ql.exec.FileSinkOperator.process(FileSinkOperator.java:988) at org.apache.hadoop.hive.ql.exec.Operator.baseForward(Operator.java:995) at org.apache.hadoop.hive.ql.exec.Operator.forward(Operator.java:941) at org.apache.hadoop.hive.ql.exec.Operator.forward(Operator.java:928) at org.apache.hadoop.hive.ql.exec.SelectOperator.process(SelectOperator.java:95) at org.apache.hadoop.hive.ql.exec.mr.ExecReducer.reduce(ExecReducer.java:237) ... 7 more{noformat} was: {code:java} create table z_tab_1( task_id string, data_date string, accno string, curr_type string, ifrs9_pd12_value double, ifrs9_ccf_value double, ifrs9_lgd_value double )partitioned by(pt_dt string) STORED AS ORCFILE TBLPROPERTIES ('bucketing_version'='1');alter table z_tab_1 add partition(pt_dt = '2020-7-31'); insert into z_tab_1 partition(pt_dt = '2020-7-31') values('123','2020-7-31','accno-','curr_type-x', 0.1, 0.2 ,0.3), ('1','2020-1-31','a','1-curr_type-a', 0.1, 0.2 ,0.3), ('2','2020-2-31','b','2-curr_type-b', 0.1, 0.2 ,0.3), ('3','2020-3-31','c','3-curr_type-c', 0.1, 0.2 ,0.3), ('4','2020-4-31','d','4-curr_type-d', 0.1, 0.2 ,0.3), ('5','2020-5-31','e','5-curr_type-e', 0.1, 0.2 ,0.3), ('6','2020-6-31','f','6-curr_type-f', 0.1, 0.2 ,0.3), ('7','2020-7-31','g','7-curr_type-g', 0.1, 0.2 ,0.3), ('8','2020-8-31','h','8-curr_type-h', 0.1, 0.2 ,0.3), ('9','2020-9-31','i','9-curr_type-i', 0.1, 0.2 ,0.3); drop table if exists z_tab_2; CREATE TABLE z_tab_2( task_id string, data_date string, accno string, curr_type string, ifrs9_pd12_value double, ifrs9_ccf_value double, ifrs9_lgd_value double ) CLUSTERED BY (TASK_ID, DATA_DATE, ACCNO, CURR_TYPE) SORTED by (TASK_ID, ACCNO, CURR_TYPE) INTO 2000 BUCKETS ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' STORED AS ORCFILE; set hive.enforce.bucketing=true; INSERT OVERWRITE TABLE z_tab_2 SELECT DCCR.TASK_ID ,DCCR.DATA_DATE ,DCCR.ACCNO ,DCCR.CURR_TYPE ,DCCR.IFRS9_PD12_VALUE ,DCCR.IFRS9_CCF_VALUE ,DCCR.IFRS9_LGD_VALUE FROM z_tab_1 DCCR WHERE pt_dt = '2020-7-31'; {code} {noformat} Caused by: java.lang.NullPointerException at org.apache.hadoop.hive.ql.exec.FileSinkOperator.findWriterOffset(FileSinkOperator.java:1072) at org.apache.hadoop.hive.ql.exec.FileSinkOperator.process(FileSinkOperator.java:988) at org.apache.hadoop.hive.ql.exec.Operator.baseForward(Operator.java:995) at org.apache.hadoop.hive.ql.exec.Operator.forward(Operator.java:941) at org.apache.hadoop.hive.ql.exec.Operator.forward(Operator.java:928) at org.apache.hadoop.hive.ql.exec.SelectOperator.process(SelectOperator.java:95) at org.apache.hadoop.hive.ql.exec.mr.ExecReducer.redu
[jira] [Updated] (HIVE-24122) When CBO is enable, CAST(STR as Bigint)IS NOT NULL result is wrong
[ https://issues.apache.org/jira/browse/HIVE-24122?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] GuangMing Lu updated HIVE-24122: Description: {code:java} create database testdb; CREATE TABLE IF NOT EXISTS testdb.z_tab ( SEARCHWORD STRING, COUNT_NUM BIGINT, WORDS STRING ) ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' STORED AS TEXTFILE; insert into table testdb.z_tab values('hivetest',111,'aaa'),('hivetest2',111,'bbb'); set hive.cbo.enable=true; SELECT CAST(searchword as bigint) IS NOT NULL FROM testdb.z_tab; SELECT CAST(searchword as bigint) IS NULL FROM testdb.z_tab; {code} The SQL results for both queries are the same, as follows: {noformat} +---+ | _c0 | +---+ | true | | true | +---+{noformat} SELECT CAST(searchword as bigint) IS NOT NULL FROM testdb.z_tab; execute result is wrong was: {code:java} create database testdb; CREATE TABLE IF NOT EXISTS testdb.z_tab ( SEARCHWORD STRING, COUNT_NUM BIGINT, WORDS STRING ) ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' STORED AS TEXTFILE; insert into table testdb.z_tab values('hivetest',111,'aaa'),('hivetest2',111,'bbb'); set hive.cbo.enable=true; SELECT CAST(searchword as bigint) IS NOT NULL FROM testdb.z_tab; SELECT CAST(searchword as bigint) IS NULL FROM testdb.z_tab; {code} The SQL results for both queries are the same, as follows: +---+ | _c0 | +---+ | true | | true | +---+ SELECT CAST(searchword as bigint) IS NOT NULL FROM testdb.z_tab; execute result is wrong > When CBO is enable, CAST(STR as Bigint)IS NOT NULL result is wrong > --- > > Key: HIVE-24122 > URL: https://issues.apache.org/jira/browse/HIVE-24122 > Project: Hive > Issue Type: Bug > Components: CBO >Affects Versions: 3.1.0, 3.1.2 >Reporter: GuangMing Lu >Priority: Major > > {code:java} > create database testdb; > CREATE TABLE IF NOT EXISTS testdb.z_tab > ( > SEARCHWORD STRING, > COUNT_NUM BIGINT, > WORDS STRING > ) > ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' > STORED AS TEXTFILE; > insert into table testdb.z_tab > values('hivetest',111,'aaa'),('hivetest2',111,'bbb'); > set hive.cbo.enable=true; > SELECT CAST(searchword as bigint) IS NOT NULL FROM testdb.z_tab; > SELECT CAST(searchword as bigint) IS NULL FROM testdb.z_tab; > {code} > The SQL results for both queries are the same, as follows: > {noformat} > +---+ > | _c0 | > +---+ > | true | > | true | > +---+{noformat} > SELECT CAST(searchword as bigint) IS NOT NULL FROM testdb.z_tab; execute > result is wrong > -- This message was sent by Atlassian Jira (v8.3.4#803005)
[jira] [Updated] (HIVE-24100) Syntax compile failure occurs when INSERT table column Order by is greater than 2 columns when CBO is false
[ https://issues.apache.org/jira/browse/HIVE-24100?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] GuangMing Lu updated HIVE-24100: Description: Executing the following SQL will fail to compile {code:java} set hive.cbo.enable=false; -- create tabls -- create table table_1 ( item_id string, stru_area_id string )partitioned by ( PT_DT string) stored as orc; create table table_2 ( CREATE_ORG_ID string, PROMOTION_ID string, PROMOTION_STATUS string ) partitioned by (pt_dt string) stored as orc; create table table_3 ( STRU_ID string, SUP_STRU string ) partitioned by(pt_dt string) stored as orc; set hive.cbo.enable=false; -- execute sql-- explain insert into table table_1 partition(PT_DT = '2020-08-22') (item_id , stru_area_id) select '123' ITEM_ID , T.STRU_ID STRU_AREA_ID from ( select T0.STRU_ID STRU_ID ,T0.STRU_ID STRU_ID_BRANCH from table_3 T0 ) T inner join ( select TT.CREATE_ORG_ID from table_2 TT ) TIV on (T.STRU_ID_BRANCH = TIV.CREATE_ORG_ID) group by T.STRU_ID order by 1,2; {code} {code:java} org.apache.hive.service.cli.HiveSQLException: Error while compiling statement: FAILED: SemanticException [Error 10004]: Line 5:28 Invalid table alias or column reference 'T': (possible column names are: _col0, _col1) at org.apache.hive.service.cli.operation.Operation.toSQLException(Operation.java:341) ~[hive-service-3.1.0.jar:3.1.0] at org.apache.hive.service.cli.operation.SQLOperation.prepare(SQLOperation.java:215) ~[hive-service-3.1.0.jar:3.1.0] at org.apache.hive.service.cli.operation.SQLOperation.runInternal(SQLOperation.java:316) ~[hive-service-3.1.0.jar:3.1.0] at org.apache.hive.service.cli.operation.Operation.run(Operation.java:253) ~[hive-service-3.1.0.jar:3.1.0] at org.apache.hive.service.cli.session.HiveSessionImpl.executeStatementInternal(HiveSessionImpl.java:684) ~[hive-service-3.1.0.jar:3.1.0] at org.apache.hive.service.cli.session.HiveSessionImpl.executeStatementAsync(HiveSessionImpl.java:670) ~[hive-service-3.1.0.jar:3.1.0] at org.apache.hive.service.cli.CLIService.executeStatementAsync(CLIService.java:342) ~[hive-service-3.1.0.jar:3.1.0] at org.apache.hive.service.cli.thrift.ThriftCLIService.executeNewStatement(ThriftCLIService.java:1144) ~[hive-service-3.1.0.jar:3.1.0] at org.apache.hive.service.cli.thrift.ThriftCLIService.ExecuteStatement(ThriftCLIService.java:1280) ~[hive-service-3.1.0.jar:3.1.0] at org.apache.hive.service.rpc.thrift.TCLIService$Processor$ExecuteStatement.getResult(TCLIService.java:1557) ~[hive-exec-3.1.0.jar:3.1.0] at org.apache.hive.service.rpc.thrift.TCLIService$Processor$ExecuteStatement.getResult(TCLIService.java:1542) ~[hive-exec-3.1.0.jar:3.1.0] at org.apache.thrift.ProcessFunction.process(ProcessFunction.java:39) ~[hive-exec-3.1.0.jar:3.1.0] at org.apache.thrift.TBaseProcessor.process(TBaseProcessor.java:39) ~[hive-exec-3.1.0.jar:3.1.0] at org.apache.hadoop.hive.metastore.security.HadoopThriftAuthBridge$Server$TUGIAssumingProcessor.process(HadoopThriftAuthBridge.java:648) ~[hive-exec-3.1.0.jar:3.1.0] at org.apache.thrift.server.TThreadPoolServer$WorkerProcess.run(TThreadPoolServer.java:286) ~[hive-exec-3.1.0.jar:3.1.0] at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) ~[?:1.8.0_201] at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) ~[?:1.8.0_201] at java.lang.Thread.run(Thread.java:748) [?:1.8.0_201] Caused by: org.apache.hadoop.hive.ql.parse.SemanticException: Line 5:28 Invalid table alias or column reference 'T': (possible column names are: _col0, _col1) at org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.genAllExprNodeDesc(SemanticAnalyzer.java:12689) ~[hive-exec-3.1.0.jar:3.1.0] at org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.genExprNodeDesc(SemanticAnalyzer.java:12629) ~[hive-exec-3.1.0.jar:3.1.0] at org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.genExprNodeDesc(SemanticAnalyzer.java:12597) ~[hive-exec-3.1.0.jar:3.1.0] at org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.genExprNodeDesc(SemanticAnalyzer.java:12575) ~[hive-exec-3.1.0.jar:3.1.0] at org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.genReduceSinkPlan(SemanticAnalyzer.java:8482) ~[hive-exec-3.1.0.jar:3.1.0] at org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.genPostGroupByBodyPlan(SemanticAnalyzer.java:10616) ~[hive-exec-3.1.0.jar:3.1.0] at org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.genBodyPlan(SemanticAnalyzer.java:10515) ~[hive-exec-3.1.0.jar:3.1.0] at org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.genPlan(SemanticAnalyzer.java:11434) ~[hive-exec-3.1.0.jar:3.1.0] at org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.genPlan(SemanticAnalyzer.java:11304) ~[hive-exec-3.1.0.jar:3.1.0] at org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.genOPTree(SemanticAnalyzer.java:12090) ~[hive-exec-3.1.0.jar:3.1.0] at org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.an