[jira] [Commented] (HIVE-21132) Semi join edge is not being removed despite max bloomfilter entries set to 1

2019-01-28 Thread Vineet Garg (JIRA)


[ 
https://issues.apache.org/jira/browse/HIVE-21132?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=16754259#comment-16754259
 ] 

Vineet Garg commented on HIVE-21132:


Pushed to master, thanks [~jdere]

> Semi join edge is not being removed despite max bloomfilter entries set to 1
> 
>
> Key: HIVE-21132
> URL: https://issues.apache.org/jira/browse/HIVE-21132
> Project: Hive
>  Issue Type: Bug
>  Components: Query Planning
>Affects Versions: 4.0.0
>Reporter: Vineet Garg
>Assignee: Vineet Garg
>Priority: Major
> Attachments: HIVE-21132.1.patch, HIVE-21132.2.patch, 
> HIVE-21132.3.patch
>
>
> * Reproducer
> {code:sql}
> --! qt:dataset:lineitem
> --! qt:dataset:part
> --! qt:dataset:src
> set hive.support.concurrency=true;
> set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager;
> --set hive.compute.query.using.stats=false;
> set hive.mapred.mode=nonstrict;
> set hive.explain.user=false;
> set hive.optimize.ppd=true;
> set hive.ppd.remove.duplicatefilters=true;
> set hive.tez.dynamic.partition.pruning=true;
> set hive.tez.dynamic.semijoin.reduction=true;
> set hive.optimize.metadataonly=false;
> set hive.optimize.index.filter=true;
> set hive.stats.autogather=true;
> set hive.tez.bigtable.minsize.semijoin.reduction=1;
> set hive.tez.min.bloom.filter.entries=1;
> set hive.stats.fetch.column.stats=true;
> set hive.tez.bloom.filter.factor=1.0f;
> set hive.auto.convert.join=false;
> set hive.optimize.shared.work=false;
> create database tpch_test;
> use tpch_test;
> CREATE TABLE `customer`(
>   `c_custkey` bigint, 
>   `c_name` string, 
>   `c_address` string, 
>   `c_nationkey` bigint, 
>   `c_phone` string, 
>   `c_acctbal` double, 
>   `c_mktsegment` string, 
>   `c_comment` string)
> ROW FORMAT SERDE 
>   'org.apache.hadoop.hive.ql.io.orc.OrcSerde' 
> STORED AS INPUTFORMAT 
>   'org.apache.hadoop.hive.ql.io.orc.OrcInputFormat' 
> OUTPUTFORMAT 
>   'org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat'
> TBLPROPERTIES (
>   'bucketing_version'='2', 
>   'transactional'='true', 
>   'transactional_properties'='default', 
>   'transient_lastDdlTime'='1543026723');
> CREATE TABLE `lineitem`(
>   `l_orderkey` bigint, 
>   `l_partkey` bigint, 
>   `l_suppkey` bigint, 
>   `l_linenumber` int, 
>   `l_quantity` double, 
>   `l_extendedprice` double, 
>   `l_discount` double, 
>   `l_tax` double, 
>   `l_returnflag` string, 
>   `l_linestatus` string, 
>   `l_shipdate` string, 
>   `l_commitdate` string, 
>   `l_receiptdate` string, 
>   `l_shipinstruct` string, 
>   `l_shipmode` string, 
>   `l_comment` string)
> ROW FORMAT SERDE 
>   'org.apache.hadoop.hive.ql.io.orc.OrcSerde' 
> STORED AS INPUTFORMAT 
>   'org.apache.hadoop.hive.ql.io.orc.OrcInputFormat' 
> OUTPUTFORMAT 
>   'org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat'
> TBLPROPERTIES (
>   'bucketing_version'='2', 
>   'transactional'='true', 
>   'transactional_properties'='default', 
>   'transient_lastDdlTime'='1543027179');
> CREATE TABLE `orders`(
>   `o_orderkey` bigint, 
>   `o_custkey` bigint, 
>   `o_orderstatus` string, 
>   `o_totalprice` double, 
>   `o_orderdate` string, 
>   `o_orderpriority` string, 
>   `o_clerk` string, 
>   `o_shippriority` int, 
>   `o_comment` string)
> ROW FORMAT SERDE 
>   'org.apache.hadoop.hive.ql.io.orc.OrcSerde' 
> STORED AS INPUTFORMAT 
>   'org.apache.hadoop.hive.ql.io.orc.OrcInputFormat' 
> OUTPUTFORMAT 
>   'org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat'
> TBLPROPERTIES (
>   'bucketing_version'='2', 
>   'transactional'='true', 
>   'transactional_properties'='default', 
>   'transient_lastDdlTime'='1543026824');
> alter table customer update statistics 
> set('numRows'='15000','rawDataSize'='8633707142');
> alter table lineitem update statistics 
> set('numRows'='589709','rawDataSize'='184245066955');
> alter table orders update statistics 
> set('numRows'='15','rawDataSize'='46741318253');
> create view q18_tmp_cached as
> select l_orderkey, sum(l_quantity) as t_sum_quantity
> from lineitem
> where l_orderkey is not null
> group by l_orderkey;
> -- Set bloom filter size to huge number so we get any possible semijoin 
> reductions
> set hive.tez.min.bloom.filter.entries=0;
> set hive.tez.max.bloom.filter.entries=1;
> create table q18_large_volume_customer_cached stored as orc tblproperties 
> ('transactional'='true', 'transactional_properties'='default') as
> select c_name, c_custkey, o_orderkey, o_orderdate, o_totalprice, 
> sum(l_quantity)
> from customer, orders, q18_tmp_cached t, lineitem l
> where
>   c_custkey = o_custkey and o_orderkey = t.l_orderkey
>   and o_orderkey is not null and t.t_sum_quantity > 300
>   and o_orderkey = l.l_orderkey and l.l_orderkey is not null
> group by c_name, c_custkey, o_orderkey, o_orderdate, 

[jira] [Commented] (HIVE-21132) Semi join edge is not being removed despite max bloomfilter entries set to 1

2019-01-28 Thread Jason Dere (JIRA)


[ 
https://issues.apache.org/jira/browse/HIVE-21132?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=16754239#comment-16754239
 ] 

Jason Dere commented on HIVE-21132:
---

+1

> Semi join edge is not being removed despite max bloomfilter entries set to 1
> 
>
> Key: HIVE-21132
> URL: https://issues.apache.org/jira/browse/HIVE-21132
> Project: Hive
>  Issue Type: Bug
>  Components: Query Planning
>Affects Versions: 4.0.0
>Reporter: Vineet Garg
>Assignee: Vineet Garg
>Priority: Major
> Attachments: HIVE-21132.1.patch, HIVE-21132.2.patch, 
> HIVE-21132.3.patch
>
>
> * Reproducer
> {code:sql}
> --! qt:dataset:lineitem
> --! qt:dataset:part
> --! qt:dataset:src
> set hive.support.concurrency=true;
> set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager;
> --set hive.compute.query.using.stats=false;
> set hive.mapred.mode=nonstrict;
> set hive.explain.user=false;
> set hive.optimize.ppd=true;
> set hive.ppd.remove.duplicatefilters=true;
> set hive.tez.dynamic.partition.pruning=true;
> set hive.tez.dynamic.semijoin.reduction=true;
> set hive.optimize.metadataonly=false;
> set hive.optimize.index.filter=true;
> set hive.stats.autogather=true;
> set hive.tez.bigtable.minsize.semijoin.reduction=1;
> set hive.tez.min.bloom.filter.entries=1;
> set hive.stats.fetch.column.stats=true;
> set hive.tez.bloom.filter.factor=1.0f;
> set hive.auto.convert.join=false;
> set hive.optimize.shared.work=false;
> create database tpch_test;
> use tpch_test;
> CREATE TABLE `customer`(
>   `c_custkey` bigint, 
>   `c_name` string, 
>   `c_address` string, 
>   `c_nationkey` bigint, 
>   `c_phone` string, 
>   `c_acctbal` double, 
>   `c_mktsegment` string, 
>   `c_comment` string)
> ROW FORMAT SERDE 
>   'org.apache.hadoop.hive.ql.io.orc.OrcSerde' 
> STORED AS INPUTFORMAT 
>   'org.apache.hadoop.hive.ql.io.orc.OrcInputFormat' 
> OUTPUTFORMAT 
>   'org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat'
> TBLPROPERTIES (
>   'bucketing_version'='2', 
>   'transactional'='true', 
>   'transactional_properties'='default', 
>   'transient_lastDdlTime'='1543026723');
> CREATE TABLE `lineitem`(
>   `l_orderkey` bigint, 
>   `l_partkey` bigint, 
>   `l_suppkey` bigint, 
>   `l_linenumber` int, 
>   `l_quantity` double, 
>   `l_extendedprice` double, 
>   `l_discount` double, 
>   `l_tax` double, 
>   `l_returnflag` string, 
>   `l_linestatus` string, 
>   `l_shipdate` string, 
>   `l_commitdate` string, 
>   `l_receiptdate` string, 
>   `l_shipinstruct` string, 
>   `l_shipmode` string, 
>   `l_comment` string)
> ROW FORMAT SERDE 
>   'org.apache.hadoop.hive.ql.io.orc.OrcSerde' 
> STORED AS INPUTFORMAT 
>   'org.apache.hadoop.hive.ql.io.orc.OrcInputFormat' 
> OUTPUTFORMAT 
>   'org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat'
> TBLPROPERTIES (
>   'bucketing_version'='2', 
>   'transactional'='true', 
>   'transactional_properties'='default', 
>   'transient_lastDdlTime'='1543027179');
> CREATE TABLE `orders`(
>   `o_orderkey` bigint, 
>   `o_custkey` bigint, 
>   `o_orderstatus` string, 
>   `o_totalprice` double, 
>   `o_orderdate` string, 
>   `o_orderpriority` string, 
>   `o_clerk` string, 
>   `o_shippriority` int, 
>   `o_comment` string)
> ROW FORMAT SERDE 
>   'org.apache.hadoop.hive.ql.io.orc.OrcSerde' 
> STORED AS INPUTFORMAT 
>   'org.apache.hadoop.hive.ql.io.orc.OrcInputFormat' 
> OUTPUTFORMAT 
>   'org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat'
> TBLPROPERTIES (
>   'bucketing_version'='2', 
>   'transactional'='true', 
>   'transactional_properties'='default', 
>   'transient_lastDdlTime'='1543026824');
> alter table customer update statistics 
> set('numRows'='15000','rawDataSize'='8633707142');
> alter table lineitem update statistics 
> set('numRows'='589709','rawDataSize'='184245066955');
> alter table orders update statistics 
> set('numRows'='15','rawDataSize'='46741318253');
> create view q18_tmp_cached as
> select l_orderkey, sum(l_quantity) as t_sum_quantity
> from lineitem
> where l_orderkey is not null
> group by l_orderkey;
> -- Set bloom filter size to huge number so we get any possible semijoin 
> reductions
> set hive.tez.min.bloom.filter.entries=0;
> set hive.tez.max.bloom.filter.entries=1;
> create table q18_large_volume_customer_cached stored as orc tblproperties 
> ('transactional'='true', 'transactional_properties'='default') as
> select c_name, c_custkey, o_orderkey, o_orderdate, o_totalprice, 
> sum(l_quantity)
> from customer, orders, q18_tmp_cached t, lineitem l
> where
>   c_custkey = o_custkey and o_orderkey = t.l_orderkey
>   and o_orderkey is not null and t.t_sum_quantity > 300
>   and o_orderkey = l.l_orderkey and l.l_orderkey is not null
> group by c_name, c_custkey, o_orderkey, o_orderdate, o_totalprice
> order by o_totalprice 

[jira] [Commented] (HIVE-21132) Semi join edge is not being removed despite max bloomfilter entries set to 1

2019-01-25 Thread Vineet Garg (JIRA)


[ 
https://issues.apache.org/jira/browse/HIVE-21132?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=16752804#comment-16752804
 ] 

Vineet Garg commented on HIVE-21132:


[~jcamachorodriguez] [~jdere] Can you take a look at it? The change is in 
reduce dedup logic to skip semi join branches.

> Semi join edge is not being removed despite max bloomfilter entries set to 1
> 
>
> Key: HIVE-21132
> URL: https://issues.apache.org/jira/browse/HIVE-21132
> Project: Hive
>  Issue Type: Bug
>  Components: Query Planning
>Affects Versions: 4.0.0
>Reporter: Vineet Garg
>Assignee: Vineet Garg
>Priority: Major
> Attachments: HIVE-21132.1.patch, HIVE-21132.2.patch, 
> HIVE-21132.3.patch
>
>
> * Reproducer
> {code:sql}
> --! qt:dataset:lineitem
> --! qt:dataset:part
> --! qt:dataset:src
> set hive.support.concurrency=true;
> set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager;
> --set hive.compute.query.using.stats=false;
> set hive.mapred.mode=nonstrict;
> set hive.explain.user=false;
> set hive.optimize.ppd=true;
> set hive.ppd.remove.duplicatefilters=true;
> set hive.tez.dynamic.partition.pruning=true;
> set hive.tez.dynamic.semijoin.reduction=true;
> set hive.optimize.metadataonly=false;
> set hive.optimize.index.filter=true;
> set hive.stats.autogather=true;
> set hive.tez.bigtable.minsize.semijoin.reduction=1;
> set hive.tez.min.bloom.filter.entries=1;
> set hive.stats.fetch.column.stats=true;
> set hive.tez.bloom.filter.factor=1.0f;
> set hive.auto.convert.join=false;
> set hive.optimize.shared.work=false;
> create database tpch_test;
> use tpch_test;
> CREATE TABLE `customer`(
>   `c_custkey` bigint, 
>   `c_name` string, 
>   `c_address` string, 
>   `c_nationkey` bigint, 
>   `c_phone` string, 
>   `c_acctbal` double, 
>   `c_mktsegment` string, 
>   `c_comment` string)
> ROW FORMAT SERDE 
>   'org.apache.hadoop.hive.ql.io.orc.OrcSerde' 
> STORED AS INPUTFORMAT 
>   'org.apache.hadoop.hive.ql.io.orc.OrcInputFormat' 
> OUTPUTFORMAT 
>   'org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat'
> TBLPROPERTIES (
>   'bucketing_version'='2', 
>   'transactional'='true', 
>   'transactional_properties'='default', 
>   'transient_lastDdlTime'='1543026723');
> CREATE TABLE `lineitem`(
>   `l_orderkey` bigint, 
>   `l_partkey` bigint, 
>   `l_suppkey` bigint, 
>   `l_linenumber` int, 
>   `l_quantity` double, 
>   `l_extendedprice` double, 
>   `l_discount` double, 
>   `l_tax` double, 
>   `l_returnflag` string, 
>   `l_linestatus` string, 
>   `l_shipdate` string, 
>   `l_commitdate` string, 
>   `l_receiptdate` string, 
>   `l_shipinstruct` string, 
>   `l_shipmode` string, 
>   `l_comment` string)
> ROW FORMAT SERDE 
>   'org.apache.hadoop.hive.ql.io.orc.OrcSerde' 
> STORED AS INPUTFORMAT 
>   'org.apache.hadoop.hive.ql.io.orc.OrcInputFormat' 
> OUTPUTFORMAT 
>   'org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat'
> TBLPROPERTIES (
>   'bucketing_version'='2', 
>   'transactional'='true', 
>   'transactional_properties'='default', 
>   'transient_lastDdlTime'='1543027179');
> CREATE TABLE `orders`(
>   `o_orderkey` bigint, 
>   `o_custkey` bigint, 
>   `o_orderstatus` string, 
>   `o_totalprice` double, 
>   `o_orderdate` string, 
>   `o_orderpriority` string, 
>   `o_clerk` string, 
>   `o_shippriority` int, 
>   `o_comment` string)
> ROW FORMAT SERDE 
>   'org.apache.hadoop.hive.ql.io.orc.OrcSerde' 
> STORED AS INPUTFORMAT 
>   'org.apache.hadoop.hive.ql.io.orc.OrcInputFormat' 
> OUTPUTFORMAT 
>   'org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat'
> TBLPROPERTIES (
>   'bucketing_version'='2', 
>   'transactional'='true', 
>   'transactional_properties'='default', 
>   'transient_lastDdlTime'='1543026824');
> alter table customer update statistics 
> set('numRows'='15000','rawDataSize'='8633707142');
> alter table lineitem update statistics 
> set('numRows'='589709','rawDataSize'='184245066955');
> alter table orders update statistics 
> set('numRows'='15','rawDataSize'='46741318253');
> create view q18_tmp_cached as
> select l_orderkey, sum(l_quantity) as t_sum_quantity
> from lineitem
> where l_orderkey is not null
> group by l_orderkey;
> -- Set bloom filter size to huge number so we get any possible semijoin 
> reductions
> set hive.tez.min.bloom.filter.entries=0;
> set hive.tez.max.bloom.filter.entries=1;
> create table q18_large_volume_customer_cached stored as orc tblproperties 
> ('transactional'='true', 'transactional_properties'='default') as
> select c_name, c_custkey, o_orderkey, o_orderdate, o_totalprice, 
> sum(l_quantity)
> from customer, orders, q18_tmp_cached t, lineitem l
> where
>   c_custkey = o_custkey and o_orderkey = t.l_orderkey
>   and o_orderkey is not null and t.t_sum_quantity > 300
>   and o_orderkey = l.l_orderkey 

[jira] [Commented] (HIVE-21132) Semi join edge is not being removed despite max bloomfilter entries set to 1

2019-01-24 Thread Hive QA (JIRA)


[ 
https://issues.apache.org/jira/browse/HIVE-21132?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=16751699#comment-16751699
 ] 

Hive QA commented on HIVE-21132:




Here are the results of testing the latest attachment:
https://issues.apache.org/jira/secure/attachment/12956191/HIVE-21132.3.patch

{color:green}SUCCESS:{color} +1 due to 1 test(s) being added or modified.

{color:green}SUCCESS:{color} +1 due to 15710 tests passed

Test results: 
https://builds.apache.org/job/PreCommit-HIVE-Build/15784/testReport
Console output: https://builds.apache.org/job/PreCommit-HIVE-Build/15784/console
Test logs: http://104.198.109.242/logs/PreCommit-HIVE-Build-15784/

Messages:
{noformat}
Executing org.apache.hive.ptest.execution.TestCheckPhase
Executing org.apache.hive.ptest.execution.PrepPhase
Executing org.apache.hive.ptest.execution.YetusPhase
Executing org.apache.hive.ptest.execution.ExecutionPhase
Executing org.apache.hive.ptest.execution.ReportingPhase
{noformat}

This message is automatically generated.

ATTACHMENT ID: 12956191 - PreCommit-HIVE-Build

> Semi join edge is not being removed despite max bloomfilter entries set to 1
> 
>
> Key: HIVE-21132
> URL: https://issues.apache.org/jira/browse/HIVE-21132
> Project: Hive
>  Issue Type: Bug
>  Components: Query Planning
>Affects Versions: 4.0.0
>Reporter: Vineet Garg
>Assignee: Vineet Garg
>Priority: Major
> Attachments: HIVE-21132.1.patch, HIVE-21132.2.patch, 
> HIVE-21132.3.patch
>
>
> * Reproducer
> {code:sql}
> --! qt:dataset:lineitem
> --! qt:dataset:part
> --! qt:dataset:src
> set hive.support.concurrency=true;
> set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager;
> --set hive.compute.query.using.stats=false;
> set hive.mapred.mode=nonstrict;
> set hive.explain.user=false;
> set hive.optimize.ppd=true;
> set hive.ppd.remove.duplicatefilters=true;
> set hive.tez.dynamic.partition.pruning=true;
> set hive.tez.dynamic.semijoin.reduction=true;
> set hive.optimize.metadataonly=false;
> set hive.optimize.index.filter=true;
> set hive.stats.autogather=true;
> set hive.tez.bigtable.minsize.semijoin.reduction=1;
> set hive.tez.min.bloom.filter.entries=1;
> set hive.stats.fetch.column.stats=true;
> set hive.tez.bloom.filter.factor=1.0f;
> set hive.auto.convert.join=false;
> set hive.optimize.shared.work=false;
> create database tpch_test;
> use tpch_test;
> CREATE TABLE `customer`(
>   `c_custkey` bigint, 
>   `c_name` string, 
>   `c_address` string, 
>   `c_nationkey` bigint, 
>   `c_phone` string, 
>   `c_acctbal` double, 
>   `c_mktsegment` string, 
>   `c_comment` string)
> ROW FORMAT SERDE 
>   'org.apache.hadoop.hive.ql.io.orc.OrcSerde' 
> STORED AS INPUTFORMAT 
>   'org.apache.hadoop.hive.ql.io.orc.OrcInputFormat' 
> OUTPUTFORMAT 
>   'org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat'
> TBLPROPERTIES (
>   'bucketing_version'='2', 
>   'transactional'='true', 
>   'transactional_properties'='default', 
>   'transient_lastDdlTime'='1543026723');
> CREATE TABLE `lineitem`(
>   `l_orderkey` bigint, 
>   `l_partkey` bigint, 
>   `l_suppkey` bigint, 
>   `l_linenumber` int, 
>   `l_quantity` double, 
>   `l_extendedprice` double, 
>   `l_discount` double, 
>   `l_tax` double, 
>   `l_returnflag` string, 
>   `l_linestatus` string, 
>   `l_shipdate` string, 
>   `l_commitdate` string, 
>   `l_receiptdate` string, 
>   `l_shipinstruct` string, 
>   `l_shipmode` string, 
>   `l_comment` string)
> ROW FORMAT SERDE 
>   'org.apache.hadoop.hive.ql.io.orc.OrcSerde' 
> STORED AS INPUTFORMAT 
>   'org.apache.hadoop.hive.ql.io.orc.OrcInputFormat' 
> OUTPUTFORMAT 
>   'org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat'
> TBLPROPERTIES (
>   'bucketing_version'='2', 
>   'transactional'='true', 
>   'transactional_properties'='default', 
>   'transient_lastDdlTime'='1543027179');
> CREATE TABLE `orders`(
>   `o_orderkey` bigint, 
>   `o_custkey` bigint, 
>   `o_orderstatus` string, 
>   `o_totalprice` double, 
>   `o_orderdate` string, 
>   `o_orderpriority` string, 
>   `o_clerk` string, 
>   `o_shippriority` int, 
>   `o_comment` string)
> ROW FORMAT SERDE 
>   'org.apache.hadoop.hive.ql.io.orc.OrcSerde' 
> STORED AS INPUTFORMAT 
>   'org.apache.hadoop.hive.ql.io.orc.OrcInputFormat' 
> OUTPUTFORMAT 
>   'org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat'
> TBLPROPERTIES (
>   'bucketing_version'='2', 
>   'transactional'='true', 
>   'transactional_properties'='default', 
>   'transient_lastDdlTime'='1543026824');
> alter table customer update statistics 
> set('numRows'='15000','rawDataSize'='8633707142');
> alter table lineitem update statistics 
> set('numRows'='589709','rawDataSize'='184245066955');
> alter table orders update statistics 
> set('numRows'='15','rawDataSize'='46741318253');
> 

[jira] [Commented] (HIVE-21132) Semi join edge is not being removed despite max bloomfilter entries set to 1

2019-01-24 Thread Hive QA (JIRA)


[ 
https://issues.apache.org/jira/browse/HIVE-21132?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=16751671#comment-16751671
 ] 

Hive QA commented on HIVE-21132:


| (x) *{color:red}-1 overall{color}* |
\\
\\
|| Vote || Subsystem || Runtime || Comment ||
|| || || || {color:brown} Prechecks {color} ||
| {color:green}+1{color} | {color:green} @author {color} | {color:green}  0m  
0s{color} | {color:green} The patch does not contain any @author tags. {color} |
|| || || || {color:brown} master Compile Tests {color} ||
| {color:blue}0{color} | {color:blue} mvndep {color} | {color:blue}  1m  
5s{color} | {color:blue} Maven dependency ordering for branch {color} |
| {color:green}+1{color} | {color:green} mvninstall {color} | {color:green}  6m 
24s{color} | {color:green} master passed {color} |
| {color:green}+1{color} | {color:green} compile {color} | {color:green}  1m  
1s{color} | {color:green} master passed {color} |
| {color:green}+1{color} | {color:green} checkstyle {color} | {color:green}  0m 
37s{color} | {color:green} master passed {color} |
| {color:blue}0{color} | {color:blue} findbugs {color} | {color:blue}  3m 
45s{color} | {color:blue} ql in master has 2304 extant Findbugs warnings. 
{color} |
| {color:green}+1{color} | {color:green} javadoc {color} | {color:green}  0m 
54s{color} | {color:green} master passed {color} |
|| || || || {color:brown} Patch Compile Tests {color} ||
| {color:blue}0{color} | {color:blue} mvndep {color} | {color:blue}  0m 
26s{color} | {color:blue} Maven dependency ordering for patch {color} |
| {color:green}+1{color} | {color:green} mvninstall {color} | {color:green}  1m 
19s{color} | {color:green} the patch passed {color} |
| {color:green}+1{color} | {color:green} compile {color} | {color:green}  0m 
59s{color} | {color:green} the patch passed {color} |
| {color:green}+1{color} | {color:green} javac {color} | {color:green}  0m 
59s{color} | {color:green} the patch passed {color} |
| {color:green}+1{color} | {color:green} checkstyle {color} | {color:green}  0m 
35s{color} | {color:green} the patch passed {color} |
| {color:red}-1{color} | {color:red} whitespace {color} | {color:red}  0m  
0s{color} | {color:red} The patch has 54 line(s) that end in whitespace. Use 
git apply --whitespace=fix <>. Refer 
https://git-scm.com/docs/git-apply {color} |
| {color:green}+1{color} | {color:green} findbugs {color} | {color:green}  3m 
47s{color} | {color:green} the patch passed {color} |
| {color:green}+1{color} | {color:green} javadoc {color} | {color:green}  0m 
54s{color} | {color:green} the patch passed {color} |
|| || || || {color:brown} Other Tests {color} ||
| {color:green}+1{color} | {color:green} asflicense {color} | {color:green}  0m 
13s{color} | {color:green} The patch does not generate ASF License warnings. 
{color} |
| {color:black}{color} | {color:black} {color} | {color:black} 22m 21s{color} | 
{color:black} {color} |
\\
\\
|| Subsystem || Report/Notes ||
| Optional Tests |  asflicense  javac  javadoc  findbugs  checkstyle  compile  |
| uname | Linux hiveptest-server-upstream 3.16.0-4-amd64 #1 SMP Debian 
3.16.36-1+deb8u1 (2016-09-03) x86_64 GNU/Linux |
| Build tool | maven |
| Personality | 
/data/hiveptest/working/yetus_PreCommit-HIVE-Build-15784/dev-support/hive-personality.sh
 |
| git revision | master / 1327d47 |
| Default Java | 1.8.0_111 |
| findbugs | v3.0.0 |
| whitespace | 
http://104.198.109.242/logs//PreCommit-HIVE-Build-15784/yetus/whitespace-eol.txt
 |
| modules | C: ql itests U: . |
| Console output | 
http://104.198.109.242/logs//PreCommit-HIVE-Build-15784/yetus.txt |
| Powered by | Apache Yetushttp://yetus.apache.org |


This message was automatically generated.



> Semi join edge is not being removed despite max bloomfilter entries set to 1
> 
>
> Key: HIVE-21132
> URL: https://issues.apache.org/jira/browse/HIVE-21132
> Project: Hive
>  Issue Type: Bug
>  Components: Query Planning
>Affects Versions: 4.0.0
>Reporter: Vineet Garg
>Assignee: Vineet Garg
>Priority: Major
> Attachments: HIVE-21132.1.patch, HIVE-21132.2.patch, 
> HIVE-21132.3.patch
>
>
> * Reproducer
> {code:sql}
> --! qt:dataset:lineitem
> --! qt:dataset:part
> --! qt:dataset:src
> set hive.support.concurrency=true;
> set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager;
> --set hive.compute.query.using.stats=false;
> set hive.mapred.mode=nonstrict;
> set hive.explain.user=false;
> set hive.optimize.ppd=true;
> set hive.ppd.remove.duplicatefilters=true;
> set hive.tez.dynamic.partition.pruning=true;
> set hive.tez.dynamic.semijoin.reduction=true;
> set hive.optimize.metadataonly=false;
> set hive.optimize.index.filter=true;
> set hive.stats.autogather=true;
> set hive.tez.bigtable.minsize.semijoin.reduction=1;
> set 

[jira] [Commented] (HIVE-21132) Semi join edge is not being removed despite max bloomfilter entries set to 1

2019-01-23 Thread Hive QA (JIRA)


[ 
https://issues.apache.org/jira/browse/HIVE-21132?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=16750650#comment-16750650
 ] 

Hive QA commented on HIVE-21132:




Here are the results of testing the latest attachment:
https://issues.apache.org/jira/secure/attachment/12956049/HIVE-21132.2.patch

{color:green}SUCCESS:{color} +1 due to 1 test(s) being added or modified.

{color:red}ERROR:{color} -1 due to 2 failed/errored test(s), 15710 tests 
executed
*Failed tests:*
{noformat}
org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[semijoin_reddedup] 
(batchId=40)
org.apache.hive.jdbc.TestSSL.testMetastoreWithSSL (batchId=260)
{noformat}

Test results: 
https://builds.apache.org/job/PreCommit-HIVE-Build/15768/testReport
Console output: https://builds.apache.org/job/PreCommit-HIVE-Build/15768/console
Test logs: http://104.198.109.242/logs/PreCommit-HIVE-Build-15768/

Messages:
{noformat}
Executing org.apache.hive.ptest.execution.TestCheckPhase
Executing org.apache.hive.ptest.execution.PrepPhase
Executing org.apache.hive.ptest.execution.YetusPhase
Executing org.apache.hive.ptest.execution.ExecutionPhase
Executing org.apache.hive.ptest.execution.ReportingPhase
Tests exited with: TestsFailedException: 2 tests failed
{noformat}

This message is automatically generated.

ATTACHMENT ID: 12956049 - PreCommit-HIVE-Build

> Semi join edge is not being removed despite max bloomfilter entries set to 1
> 
>
> Key: HIVE-21132
> URL: https://issues.apache.org/jira/browse/HIVE-21132
> Project: Hive
>  Issue Type: Bug
>  Components: Query Planning
>Affects Versions: 4.0.0
>Reporter: Vineet Garg
>Assignee: Vineet Garg
>Priority: Major
> Attachments: HIVE-21132.1.patch, HIVE-21132.2.patch
>
>
> * Reproducer
> {code:sql}
> --! qt:dataset:lineitem
> --! qt:dataset:part
> --! qt:dataset:src
> set hive.support.concurrency=true;
> set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager;
> --set hive.compute.query.using.stats=false;
> set hive.mapred.mode=nonstrict;
> set hive.explain.user=false;
> set hive.optimize.ppd=true;
> set hive.ppd.remove.duplicatefilters=true;
> set hive.tez.dynamic.partition.pruning=true;
> set hive.tez.dynamic.semijoin.reduction=true;
> set hive.optimize.metadataonly=false;
> set hive.optimize.index.filter=true;
> set hive.stats.autogather=true;
> set hive.tez.bigtable.minsize.semijoin.reduction=1;
> set hive.tez.min.bloom.filter.entries=1;
> set hive.stats.fetch.column.stats=true;
> set hive.tez.bloom.filter.factor=1.0f;
> set hive.auto.convert.join=false;
> set hive.optimize.shared.work=false;
> create database tpch_test;
> use tpch_test;
> CREATE TABLE `customer`(
>   `c_custkey` bigint, 
>   `c_name` string, 
>   `c_address` string, 
>   `c_nationkey` bigint, 
>   `c_phone` string, 
>   `c_acctbal` double, 
>   `c_mktsegment` string, 
>   `c_comment` string)
> ROW FORMAT SERDE 
>   'org.apache.hadoop.hive.ql.io.orc.OrcSerde' 
> STORED AS INPUTFORMAT 
>   'org.apache.hadoop.hive.ql.io.orc.OrcInputFormat' 
> OUTPUTFORMAT 
>   'org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat'
> TBLPROPERTIES (
>   'bucketing_version'='2', 
>   'transactional'='true', 
>   'transactional_properties'='default', 
>   'transient_lastDdlTime'='1543026723');
> CREATE TABLE `lineitem`(
>   `l_orderkey` bigint, 
>   `l_partkey` bigint, 
>   `l_suppkey` bigint, 
>   `l_linenumber` int, 
>   `l_quantity` double, 
>   `l_extendedprice` double, 
>   `l_discount` double, 
>   `l_tax` double, 
>   `l_returnflag` string, 
>   `l_linestatus` string, 
>   `l_shipdate` string, 
>   `l_commitdate` string, 
>   `l_receiptdate` string, 
>   `l_shipinstruct` string, 
>   `l_shipmode` string, 
>   `l_comment` string)
> ROW FORMAT SERDE 
>   'org.apache.hadoop.hive.ql.io.orc.OrcSerde' 
> STORED AS INPUTFORMAT 
>   'org.apache.hadoop.hive.ql.io.orc.OrcInputFormat' 
> OUTPUTFORMAT 
>   'org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat'
> TBLPROPERTIES (
>   'bucketing_version'='2', 
>   'transactional'='true', 
>   'transactional_properties'='default', 
>   'transient_lastDdlTime'='1543027179');
> CREATE TABLE `orders`(
>   `o_orderkey` bigint, 
>   `o_custkey` bigint, 
>   `o_orderstatus` string, 
>   `o_totalprice` double, 
>   `o_orderdate` string, 
>   `o_orderpriority` string, 
>   `o_clerk` string, 
>   `o_shippriority` int, 
>   `o_comment` string)
> ROW FORMAT SERDE 
>   'org.apache.hadoop.hive.ql.io.orc.OrcSerde' 
> STORED AS INPUTFORMAT 
>   'org.apache.hadoop.hive.ql.io.orc.OrcInputFormat' 
> OUTPUTFORMAT 
>   'org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat'
> TBLPROPERTIES (
>   'bucketing_version'='2', 
>   'transactional'='true', 
>   'transactional_properties'='default', 
>   'transient_lastDdlTime'='1543026824');
> alter table customer update statistics 
> 

[jira] [Commented] (HIVE-21132) Semi join edge is not being removed despite max bloomfilter entries set to 1

2019-01-23 Thread Hive QA (JIRA)


[ 
https://issues.apache.org/jira/browse/HIVE-21132?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=16750613#comment-16750613
 ] 

Hive QA commented on HIVE-21132:


| (x) *{color:red}-1 overall{color}* |
\\
\\
|| Vote || Subsystem || Runtime || Comment ||
|| || || || {color:brown} Prechecks {color} ||
| {color:green}+1{color} | {color:green} @author {color} | {color:green}  0m  
0s{color} | {color:green} The patch does not contain any @author tags. {color} |
|| || || || {color:brown} master Compile Tests {color} ||
| {color:blue}0{color} | {color:blue} mvndep {color} | {color:blue}  1m 
46s{color} | {color:blue} Maven dependency ordering for branch {color} |
| {color:green}+1{color} | {color:green} mvninstall {color} | {color:green}  6m 
18s{color} | {color:green} master passed {color} |
| {color:green}+1{color} | {color:green} compile {color} | {color:green}  0m 
59s{color} | {color:green} master passed {color} |
| {color:green}+1{color} | {color:green} checkstyle {color} | {color:green}  0m 
35s{color} | {color:green} master passed {color} |
| {color:blue}0{color} | {color:blue} findbugs {color} | {color:blue}  3m 
39s{color} | {color:blue} ql in master has 2304 extant Findbugs warnings. 
{color} |
| {color:green}+1{color} | {color:green} javadoc {color} | {color:green}  0m 
56s{color} | {color:green} master passed {color} |
|| || || || {color:brown} Patch Compile Tests {color} ||
| {color:blue}0{color} | {color:blue} mvndep {color} | {color:blue}  0m 
26s{color} | {color:blue} Maven dependency ordering for patch {color} |
| {color:green}+1{color} | {color:green} mvninstall {color} | {color:green}  1m 
21s{color} | {color:green} the patch passed {color} |
| {color:green}+1{color} | {color:green} compile {color} | {color:green}  1m  
1s{color} | {color:green} the patch passed {color} |
| {color:green}+1{color} | {color:green} javac {color} | {color:green}  1m  
1s{color} | {color:green} the patch passed {color} |
| {color:green}+1{color} | {color:green} checkstyle {color} | {color:green}  0m 
37s{color} | {color:green} the patch passed {color} |
| {color:red}-1{color} | {color:red} whitespace {color} | {color:red}  0m  
0s{color} | {color:red} The patch has 54 line(s) that end in whitespace. Use 
git apply --whitespace=fix <>. Refer 
https://git-scm.com/docs/git-apply {color} |
| {color:green}+1{color} | {color:green} findbugs {color} | {color:green}  3m 
48s{color} | {color:green} the patch passed {color} |
| {color:green}+1{color} | {color:green} javadoc {color} | {color:green}  0m 
54s{color} | {color:green} the patch passed {color} |
|| || || || {color:brown} Other Tests {color} ||
| {color:green}+1{color} | {color:green} asflicense {color} | {color:green}  0m 
12s{color} | {color:green} The patch does not generate ASF License warnings. 
{color} |
| {color:black}{color} | {color:black} {color} | {color:black} 23m  3s{color} | 
{color:black} {color} |
\\
\\
|| Subsystem || Report/Notes ||
| Optional Tests |  asflicense  javac  javadoc  findbugs  checkstyle  compile  |
| uname | Linux hiveptest-server-upstream 3.16.0-4-amd64 #1 SMP Debian 
3.16.36-1+deb8u1 (2016-09-03) x86_64 GNU/Linux |
| Build tool | maven |
| Personality | 
/data/hiveptest/working/yetus_PreCommit-HIVE-Build-15768/dev-support/hive-personality.sh
 |
| git revision | master / a7e704c |
| Default Java | 1.8.0_111 |
| findbugs | v3.0.0 |
| whitespace | 
http://104.198.109.242/logs//PreCommit-HIVE-Build-15768/yetus/whitespace-eol.txt
 |
| modules | C: ql itests U: . |
| Console output | 
http://104.198.109.242/logs//PreCommit-HIVE-Build-15768/yetus.txt |
| Powered by | Apache Yetushttp://yetus.apache.org |


This message was automatically generated.



> Semi join edge is not being removed despite max bloomfilter entries set to 1
> 
>
> Key: HIVE-21132
> URL: https://issues.apache.org/jira/browse/HIVE-21132
> Project: Hive
>  Issue Type: Bug
>  Components: Query Planning
>Affects Versions: 4.0.0
>Reporter: Vineet Garg
>Assignee: Vineet Garg
>Priority: Major
> Attachments: HIVE-21132.1.patch, HIVE-21132.2.patch
>
>
> * Reproducer
> {code:sql}
> --! qt:dataset:lineitem
> --! qt:dataset:part
> --! qt:dataset:src
> set hive.support.concurrency=true;
> set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager;
> --set hive.compute.query.using.stats=false;
> set hive.mapred.mode=nonstrict;
> set hive.explain.user=false;
> set hive.optimize.ppd=true;
> set hive.ppd.remove.duplicatefilters=true;
> set hive.tez.dynamic.partition.pruning=true;
> set hive.tez.dynamic.semijoin.reduction=true;
> set hive.optimize.metadataonly=false;
> set hive.optimize.index.filter=true;
> set hive.stats.autogather=true;
> set hive.tez.bigtable.minsize.semijoin.reduction=1;
> set 

[jira] [Commented] (HIVE-21132) Semi join edge is not being removed despite max bloomfilter entries set to 1

2019-01-17 Thread Hive QA (JIRA)


[ 
https://issues.apache.org/jira/browse/HIVE-21132?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=16745796#comment-16745796
 ] 

Hive QA commented on HIVE-21132:




Here are the results of testing the latest attachment:
https://issues.apache.org/jira/secure/attachment/12955311/HIVE-21132.1.patch

{color:green}SUCCESS:{color} +1 due to 1 test(s) being added or modified.

{color:red}ERROR:{color} -1 due to 170 failed/errored test(s), 15696 tests 
executed
*Failed tests:*
{noformat}
org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[semijoin_reddedup] 
(batchId=40)
org.apache.hadoop.hive.cli.TestMiniDruidCliDriver.testCliDriver[druidmini_expressions]
 (batchId=195)
org.apache.hadoop.hive.cli.TestMiniHiveKafkaCliDriver.testCliDriver[kafka_storage_handler]
 (batchId=275)
org.apache.hadoop.hive.cli.TestMiniLlapCliDriver.testCliDriver[dynamic_partition_pruning_2]
 (batchId=155)
org.apache.hadoop.hive.cli.TestMiniLlapCliDriver.testCliDriver[explainuser_2] 
(batchId=157)
org.apache.hadoop.hive.cli.TestMiniLlapCliDriver.testCliDriver[global_limit] 
(batchId=155)
org.apache.hadoop.hive.cli.TestMiniLlapCliDriver.testCliDriver[mapreduce1] 
(batchId=156)
org.apache.hadoop.hive.cli.TestMiniLlapCliDriver.testCliDriver[mapreduce2] 
(batchId=153)
org.apache.hadoop.hive.cli.TestMiniLlapCliDriver.testCliDriver[mm_all] 
(batchId=156)
org.apache.hadoop.hive.cli.TestMiniLlapCliDriver.testCliDriver[mm_cttas] 
(batchId=155)
org.apache.hadoop.hive.cli.TestMiniLlapCliDriver.testCliDriver[multi_count_distinct_null]
 (batchId=158)
org.apache.hadoop.hive.cli.TestMiniLlapCliDriver.testCliDriver[orc_struct_type_vectorization]
 (batchId=157)
org.apache.hadoop.hive.cli.TestMiniLlapCliDriver.testCliDriver[parallel_colstats]
 (batchId=155)
org.apache.hadoop.hive.cli.TestMiniLlapCliDriver.testCliDriver[parquet_complex_types_vectorization]
 (batchId=157)
org.apache.hadoop.hive.cli.TestMiniLlapCliDriver.testCliDriver[parquet_map_type_vectorization]
 (batchId=157)
org.apache.hadoop.hive.cli.TestMiniLlapCliDriver.testCliDriver[parquet_struct_type_vectorization]
 (batchId=153)
org.apache.hadoop.hive.cli.TestMiniLlapCliDriver.testCliDriver[parquet_types_vectorization]
 (batchId=153)
org.apache.hadoop.hive.cli.TestMiniLlapCliDriver.testCliDriver[unionDistinct_1] 
(batchId=155)
org.apache.hadoop.hive.cli.TestMiniLlapLocalCliDriver.testCliDriver[acid_vectorization_original]
 (batchId=181)
org.apache.hadoop.hive.cli.TestMiniLlapLocalCliDriver.testCliDriver[auto_smb_mapjoin_14]
 (batchId=175)
org.apache.hadoop.hive.cli.TestMiniLlapLocalCliDriver.testCliDriver[auto_sortmerge_join_9]
 (batchId=177)
org.apache.hadoop.hive.cli.TestMiniLlapLocalCliDriver.testCliDriver[bucket_groupby]
 (batchId=183)
org.apache.hadoop.hive.cli.TestMiniLlapLocalCliDriver.testCliDriver[cbo_gby_empty]
 (batchId=172)
org.apache.hadoop.hive.cli.TestMiniLlapLocalCliDriver.testCliDriver[cbo_rp_limit]
 (batchId=173)
org.apache.hadoop.hive.cli.TestMiniLlapLocalCliDriver.testCliDriver[cbo_rp_semijoin]
 (batchId=161)
org.apache.hadoop.hive.cli.TestMiniLlapLocalCliDriver.testCliDriver[cbo_rp_udf_udaf]
 (batchId=163)
org.apache.hadoop.hive.cli.TestMiniLlapLocalCliDriver.testCliDriver[cbo_rp_udf_udaf_stats_opt]
 (batchId=163)
org.apache.hadoop.hive.cli.TestMiniLlapLocalCliDriver.testCliDriver[cbo_semijoin]
 (batchId=170)
org.apache.hadoop.hive.cli.TestMiniLlapLocalCliDriver.testCliDriver[cbo_subq_not_in]
 (batchId=173)
org.apache.hadoop.hive.cli.TestMiniLlapLocalCliDriver.testCliDriver[cbo_udf_udaf]
 (batchId=160)
org.apache.hadoop.hive.cli.TestMiniLlapLocalCliDriver.testCliDriver[column_access_stats]
 (batchId=175)
org.apache.hadoop.hive.cli.TestMiniLlapLocalCliDriver.testCliDriver[constraints_optimization]
 (batchId=172)
org.apache.hadoop.hive.cli.TestMiniLlapLocalCliDriver.testCliDriver[correlationoptimizer1]
 (batchId=174)
org.apache.hadoop.hive.cli.TestMiniLlapLocalCliDriver.testCliDriver[correlationoptimizer2]
 (batchId=172)
org.apache.hadoop.hive.cli.TestMiniLlapLocalCliDriver.testCliDriver[correlationoptimizer4]
 (batchId=161)
org.apache.hadoop.hive.cli.TestMiniLlapLocalCliDriver.testCliDriver[count_dist_rewrite]
 (batchId=178)
org.apache.hadoop.hive.cli.TestMiniLlapLocalCliDriver.testCliDriver[cross_prod_1]
 (batchId=178)
org.apache.hadoop.hive.cli.TestMiniLlapLocalCliDriver.testCliDriver[cross_prod_3]
 (batchId=159)
org.apache.hadoop.hive.cli.TestMiniLlapLocalCliDriver.testCliDriver[distinct_66]
 (batchId=161)
org.apache.hadoop.hive.cli.TestMiniLlapLocalCliDriver.testCliDriver[dynamic_partition_pruning]
 (batchId=167)
org.apache.hadoop.hive.cli.TestMiniLlapLocalCliDriver.testCliDriver[explainanalyze_2]
 (batchId=178)
org.apache.hadoop.hive.cli.TestMiniLlapLocalCliDriver.testCliDriver[explainuser_1]
 (batchId=168)
org.apache.hadoop.hive.cli.TestMiniLlapLocalCliDriver.testCliDriver[groupby_resolution]
 (batchId=170)
org.apache.hadoop.hive.cli.TestMiniLlapLocalCliDriver.testCliDriver[infer_bucket_sort_bucketed_table]

[jira] [Commented] (HIVE-21132) Semi join edge is not being removed despite max bloomfilter entries set to 1

2019-01-17 Thread Hive QA (JIRA)


[ 
https://issues.apache.org/jira/browse/HIVE-21132?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=16745776#comment-16745776
 ] 

Hive QA commented on HIVE-21132:


| (x) *{color:red}-1 overall{color}* |
\\
\\
|| Vote || Subsystem || Runtime || Comment ||
|| || || || {color:brown} Prechecks {color} ||
| {color:green}+1{color} | {color:green} @author {color} | {color:green}  0m  
0s{color} | {color:green} The patch does not contain any @author tags. {color} |
|| || || || {color:brown} master Compile Tests {color} ||
| {color:green}+1{color} | {color:green} mvninstall {color} | {color:green}  7m 
34s{color} | {color:green} master passed {color} |
| {color:green}+1{color} | {color:green} compile {color} | {color:green}  1m  
0s{color} | {color:green} master passed {color} |
| {color:green}+1{color} | {color:green} checkstyle {color} | {color:green}  0m 
34s{color} | {color:green} master passed {color} |
| {color:blue}0{color} | {color:blue} findbugs {color} | {color:blue}  3m 
37s{color} | {color:blue} ql in master has 2310 extant Findbugs warnings. 
{color} |
| {color:green}+1{color} | {color:green} javadoc {color} | {color:green}  0m 
54s{color} | {color:green} master passed {color} |
|| || || || {color:brown} Patch Compile Tests {color} ||
| {color:green}+1{color} | {color:green} mvninstall {color} | {color:green}  1m 
20s{color} | {color:green} the patch passed {color} |
| {color:green}+1{color} | {color:green} compile {color} | {color:green}  0m 
59s{color} | {color:green} the patch passed {color} |
| {color:green}+1{color} | {color:green} javac {color} | {color:green}  0m 
59s{color} | {color:green} the patch passed {color} |
| {color:green}+1{color} | {color:green} checkstyle {color} | {color:green}  0m 
36s{color} | {color:green} the patch passed {color} |
| {color:red}-1{color} | {color:red} whitespace {color} | {color:red}  0m  
0s{color} | {color:red} The patch has 54 line(s) that end in whitespace. Use 
git apply --whitespace=fix <>. Refer 
https://git-scm.com/docs/git-apply {color} |
| {color:green}+1{color} | {color:green} findbugs {color} | {color:green}  3m 
46s{color} | {color:green} the patch passed {color} |
| {color:green}+1{color} | {color:green} javadoc {color} | {color:green}  0m 
54s{color} | {color:green} the patch passed {color} |
|| || || || {color:brown} Other Tests {color} ||
| {color:green}+1{color} | {color:green} asflicense {color} | {color:green}  0m 
12s{color} | {color:green} The patch does not generate ASF License warnings. 
{color} |
| {color:black}{color} | {color:black} {color} | {color:black} 21m 53s{color} | 
{color:black} {color} |
\\
\\
|| Subsystem || Report/Notes ||
| Optional Tests |  asflicense  javac  javadoc  findbugs  checkstyle  compile  |
| uname | Linux hiveptest-server-upstream 3.16.0-4-amd64 #1 SMP Debian 
3.16.36-1+deb8u1 (2016-09-03) x86_64 GNU/Linux |
| Build tool | maven |
| Personality | 
/data/hiveptest/working/yetus_PreCommit-HIVE-Build-15676/dev-support/hive-personality.sh
 |
| git revision | master / 67284b0 |
| Default Java | 1.8.0_111 |
| findbugs | v3.0.0 |
| whitespace | 
http://104.198.109.242/logs//PreCommit-HIVE-Build-15676/yetus/whitespace-eol.txt
 |
| modules | C: ql U: ql |
| Console output | 
http://104.198.109.242/logs//PreCommit-HIVE-Build-15676/yetus.txt |
| Powered by | Apache Yetushttp://yetus.apache.org |


This message was automatically generated.



> Semi join edge is not being removed despite max bloomfilter entries set to 1
> 
>
> Key: HIVE-21132
> URL: https://issues.apache.org/jira/browse/HIVE-21132
> Project: Hive
>  Issue Type: Bug
>  Components: Query Planning
>Affects Versions: 4.0.0
>Reporter: Vineet Garg
>Assignee: Vineet Garg
>Priority: Major
> Attachments: HIVE-21132.1.patch
>
>
> * Reproducer
> {code:sql}
> --! qt:dataset:lineitem
> --! qt:dataset:part
> --! qt:dataset:src
> set hive.support.concurrency=true;
> set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager;
> --set hive.compute.query.using.stats=false;
> set hive.mapred.mode=nonstrict;
> set hive.explain.user=false;
> set hive.optimize.ppd=true;
> set hive.ppd.remove.duplicatefilters=true;
> set hive.tez.dynamic.partition.pruning=true;
> set hive.tez.dynamic.semijoin.reduction=true;
> set hive.optimize.metadataonly=false;
> set hive.optimize.index.filter=true;
> set hive.stats.autogather=true;
> set hive.tez.bigtable.minsize.semijoin.reduction=1;
> set hive.tez.min.bloom.filter.entries=1;
> set hive.stats.fetch.column.stats=true;
> set hive.tez.bloom.filter.factor=1.0f;
> set hive.auto.convert.join=false;
> set hive.optimize.shared.work=false;
> create database tpch_test;
> use tpch_test;
> CREATE TABLE `customer`(
>   `c_custkey` bigint, 
>   `c_name` string, 
>   `c_address` string, 
>   

[jira] [Commented] (HIVE-21132) Semi join edge is not being removed despite max bloomfilter entries set to 1

2019-01-17 Thread Vineet Garg (JIRA)


[ 
https://issues.apache.org/jira/browse/HIVE-21132?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=16745608#comment-16745608
 ] 

Vineet Garg commented on HIVE-21132:


Root cause is that Reduce de-duplication ends up messing up pattern of Semi 
join branches (GB-RS-GB-RS)

> Semi join edge is not being removed despite max bloomfilter entries set to 1
> 
>
> Key: HIVE-21132
> URL: https://issues.apache.org/jira/browse/HIVE-21132
> Project: Hive
>  Issue Type: Bug
>  Components: Query Planning
>Affects Versions: 4.0.0
>Reporter: Vineet Garg
>Assignee: Vineet Garg
>Priority: Major
>
> * Reproducer
> {code:sql}
> --! qt:dataset:lineitem
> --! qt:dataset:part
> --! qt:dataset:src
> set hive.support.concurrency=true;
> set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager;
> --set hive.compute.query.using.stats=false;
> set hive.mapred.mode=nonstrict;
> set hive.explain.user=false;
> set hive.optimize.ppd=true;
> set hive.ppd.remove.duplicatefilters=true;
> set hive.tez.dynamic.partition.pruning=true;
> set hive.tez.dynamic.semijoin.reduction=true;
> set hive.optimize.metadataonly=false;
> set hive.optimize.index.filter=true;
> set hive.stats.autogather=true;
> set hive.tez.bigtable.minsize.semijoin.reduction=1;
> set hive.tez.min.bloom.filter.entries=1;
> set hive.stats.fetch.column.stats=true;
> set hive.tez.bloom.filter.factor=1.0f;
> set hive.auto.convert.join=false;
> set hive.optimize.shared.work=false;
> create database tpch_test;
> use tpch_test;
> CREATE TABLE `customer`(
>   `c_custkey` bigint, 
>   `c_name` string, 
>   `c_address` string, 
>   `c_nationkey` bigint, 
>   `c_phone` string, 
>   `c_acctbal` double, 
>   `c_mktsegment` string, 
>   `c_comment` string)
> ROW FORMAT SERDE 
>   'org.apache.hadoop.hive.ql.io.orc.OrcSerde' 
> STORED AS INPUTFORMAT 
>   'org.apache.hadoop.hive.ql.io.orc.OrcInputFormat' 
> OUTPUTFORMAT 
>   'org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat'
> TBLPROPERTIES (
>   'bucketing_version'='2', 
>   'transactional'='true', 
>   'transactional_properties'='default', 
>   'transient_lastDdlTime'='1543026723');
> CREATE TABLE `lineitem`(
>   `l_orderkey` bigint, 
>   `l_partkey` bigint, 
>   `l_suppkey` bigint, 
>   `l_linenumber` int, 
>   `l_quantity` double, 
>   `l_extendedprice` double, 
>   `l_discount` double, 
>   `l_tax` double, 
>   `l_returnflag` string, 
>   `l_linestatus` string, 
>   `l_shipdate` string, 
>   `l_commitdate` string, 
>   `l_receiptdate` string, 
>   `l_shipinstruct` string, 
>   `l_shipmode` string, 
>   `l_comment` string)
> ROW FORMAT SERDE 
>   'org.apache.hadoop.hive.ql.io.orc.OrcSerde' 
> STORED AS INPUTFORMAT 
>   'org.apache.hadoop.hive.ql.io.orc.OrcInputFormat' 
> OUTPUTFORMAT 
>   'org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat'
> TBLPROPERTIES (
>   'bucketing_version'='2', 
>   'transactional'='true', 
>   'transactional_properties'='default', 
>   'transient_lastDdlTime'='1543027179');
> CREATE TABLE `orders`(
>   `o_orderkey` bigint, 
>   `o_custkey` bigint, 
>   `o_orderstatus` string, 
>   `o_totalprice` double, 
>   `o_orderdate` string, 
>   `o_orderpriority` string, 
>   `o_clerk` string, 
>   `o_shippriority` int, 
>   `o_comment` string)
> ROW FORMAT SERDE 
>   'org.apache.hadoop.hive.ql.io.orc.OrcSerde' 
> STORED AS INPUTFORMAT 
>   'org.apache.hadoop.hive.ql.io.orc.OrcInputFormat' 
> OUTPUTFORMAT 
>   'org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat'
> TBLPROPERTIES (
>   'bucketing_version'='2', 
>   'transactional'='true', 
>   'transactional_properties'='default', 
>   'transient_lastDdlTime'='1543026824');
> alter table customer update statistics 
> set('numRows'='15000','rawDataSize'='8633707142');
> alter table lineitem update statistics 
> set('numRows'='589709','rawDataSize'='184245066955');
> alter table orders update statistics 
> set('numRows'='15','rawDataSize'='46741318253');
> create view q18_tmp_cached as
> select l_orderkey, sum(l_quantity) as t_sum_quantity
> from lineitem
> where l_orderkey is not null
> group by l_orderkey;
> -- Set bloom filter size to huge number so we get any possible semijoin 
> reductions
> set hive.tez.min.bloom.filter.entries=0;
> set hive.tez.max.bloom.filter.entries=1;
> create table q18_large_volume_customer_cached stored as orc tblproperties 
> ('transactional'='true', 'transactional_properties'='default') as
> select c_name, c_custkey, o_orderkey, o_orderdate, o_totalprice, 
> sum(l_quantity)
> from customer, orders, q18_tmp_cached t, lineitem l
> where
>   c_custkey = o_custkey and o_orderkey = t.l_orderkey
>   and o_orderkey is not null and t.t_sum_quantity > 300
>   and o_orderkey = l.l_orderkey and l.l_orderkey is not null
> group by c_name, c_custkey, o_orderkey, o_orderdate, o_totalprice
> order