[jira] [Updated] (HIVE-16573) In-place update for HoS can't be disabled
[ https://issues.apache.org/jira/browse/HIVE-16573?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Rui Li updated HIVE-16573: -- Resolution: Fixed Fix Version/s: 3.0.0 Status: Resolved (was: Patch Available) Pushed to master. Thanks Bing for the contribution and Anishek for the review. > In-place update for HoS can't be disabled > - > > Key: HIVE-16573 > URL: https://issues.apache.org/jira/browse/HIVE-16573 > Project: Hive > Issue Type: Bug > Components: Spark >Reporter: Rui Li >Assignee: Bing Li >Priority: Minor > Fix For: 3.0.0 > > Attachments: HIVE-16573.1.patch > > > {{hive.spark.exec.inplace.progress}} has no effect -- This message was sent by Atlassian JIRA (v6.3.15#6346)
[jira] [Commented] (HIVE-12631) LLAP: support ORC ACID tables
[ https://issues.apache.org/jira/browse/HIVE-12631?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=16040191#comment-16040191 ] Hive QA commented on HIVE-12631: Here are the results of testing the latest attachment: https://issues.apache.org/jira/secure/attachment/12871717/HIVE-12631.10.patch {color:green}SUCCESS:{color} +1 due to 3 test(s) being added or modified. {color:red}ERROR:{color} -1 due to 37 failed/errored test(s), 10821 tests executed *Failed tests:* {noformat} org.apache.hadoop.hive.cli.TestBeeLineDriver.testCliDriver[create_merge_compressed] (batchId=237) org.apache.hadoop.hive.cli.TestBeeLineDriver.testCliDriver[insert_overwrite_local_directory_1] (batchId=237) org.apache.hadoop.hive.cli.TestBeeLineDriver.testCliDriver[materialized_view_create_rewrite] (batchId=237) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[llap_reader] (batchId=7) org.apache.hadoop.hive.cli.TestMiniLlapCliDriver.testCliDriver[orc_ppd_basic] (batchId=140) org.apache.hadoop.hive.cli.TestMiniLlapLocalCliDriver.testCliDriver[acid_globallimit] (batchId=149) org.apache.hadoop.hive.cli.TestMiniLlapLocalCliDriver.testCliDriver[delete_all_non_partitioned] (batchId=149) org.apache.hadoop.hive.cli.TestMiniLlapLocalCliDriver.testCliDriver[delete_all_partitioned] (batchId=149) org.apache.hadoop.hive.cli.TestMiniLlapLocalCliDriver.testCliDriver[delete_tmp_table] (batchId=154) org.apache.hadoop.hive.cli.TestMiniLlapLocalCliDriver.testCliDriver[delete_where_no_match] (batchId=149) org.apache.hadoop.hive.cli.TestMiniLlapLocalCliDriver.testCliDriver[delete_where_non_partitioned] (batchId=151) org.apache.hadoop.hive.cli.TestMiniLlapLocalCliDriver.testCliDriver[delete_where_partitioned] (batchId=151) org.apache.hadoop.hive.cli.TestMiniLlapLocalCliDriver.testCliDriver[delete_whole_partition] (batchId=145) org.apache.hadoop.hive.cli.TestMiniLlapLocalCliDriver.testCliDriver[dynamic_semijoin_reduction_3] (batchId=158) org.apache.hadoop.hive.cli.TestMiniLlapLocalCliDriver.testCliDriver[dynpart_sort_optimization_acid] (batchId=152) org.apache.hadoop.hive.cli.TestMiniLlapLocalCliDriver.testCliDriver[insert_orig_table] (batchId=156) org.apache.hadoop.hive.cli.TestMiniLlapLocalCliDriver.testCliDriver[insert_update_delete] (batchId=160) org.apache.hadoop.hive.cli.TestMiniLlapLocalCliDriver.testCliDriver[insert_values_dynamic_partitioned] (batchId=158) org.apache.hadoop.hive.cli.TestMiniLlapLocalCliDriver.testCliDriver[insert_values_non_partitioned] (batchId=147) org.apache.hadoop.hive.cli.TestMiniLlapLocalCliDriver.testCliDriver[insert_values_partitioned] (batchId=158) org.apache.hadoop.hive.cli.TestMiniLlapLocalCliDriver.testCliDriver[insert_values_tmp_table] (batchId=144) org.apache.hadoop.hive.cli.TestMiniLlapLocalCliDriver.testCliDriver[join_acid_non_acid] (batchId=159) org.apache.hadoop.hive.cli.TestMiniLlapLocalCliDriver.testCliDriver[update_after_multiple_inserts] (batchId=157) org.apache.hadoop.hive.cli.TestMiniLlapLocalCliDriver.testCliDriver[update_all_non_partitioned] (batchId=145) org.apache.hadoop.hive.cli.TestMiniLlapLocalCliDriver.testCliDriver[update_all_partitioned] (batchId=154) org.apache.hadoop.hive.cli.TestMiniLlapLocalCliDriver.testCliDriver[update_all_types] (batchId=147) org.apache.hadoop.hive.cli.TestMiniLlapLocalCliDriver.testCliDriver[update_tmp_table] (batchId=151) org.apache.hadoop.hive.cli.TestMiniLlapLocalCliDriver.testCliDriver[update_two_cols] (batchId=147) org.apache.hadoop.hive.cli.TestMiniLlapLocalCliDriver.testCliDriver[update_where_no_match] (batchId=147) org.apache.hadoop.hive.cli.TestMiniLlapLocalCliDriver.testCliDriver[update_where_non_partitioned] (batchId=146) org.apache.hadoop.hive.cli.TestMiniLlapLocalCliDriver.testCliDriver[update_where_partitioned] (batchId=156) org.apache.hadoop.hive.cli.TestMiniLlapLocalCliDriver.testCliDriver[vector_acid3] (batchId=148) org.apache.hadoop.hive.cli.TestMiniLlapLocalCliDriver.testCliDriver[vector_if_expr] (batchId=145) org.apache.hadoop.hive.cli.TestMiniTezCliDriver.testCliDriver[explainanalyze_2] (batchId=99) org.apache.hadoop.hive.cli.TestNegativeCliDriver.testCliDriver[alter_rename_partition_failure3] (batchId=88) org.apache.hadoop.hive.cli.TestPerfCliDriver.testCliDriver[query23] (batchId=232) org.apache.hadoop.hive.cli.TestPerfCliDriver.testCliDriver[query78] (batchId=232) {noformat} Test results: https://builds.apache.org/job/PreCommit-HIVE-Build/5560/testReport Console output: https://builds.apache.org/job/PreCommit-HIVE-Build/5560/console Test logs: http://104.198.109.242/logs/PreCommit-HIVE-Build-5560/ Messages: {noformat} Executing org.apache.hive.ptest.execution.TestCheckPhase Executing org.apache.hive.ptest.execution.PrepPhase Executing org.apache.hive.ptest.execution.ExecutionPhase Executing org.apache.hive.ptest.execution.ReportingPhase Tests exited with: TestsFailedException: 37 tests failed {noformat} This message is automatically
[jira] [Comment Edited] (HIVE-16840) Investigate the performance of order by limit in HoS
[ https://issues.apache.org/jira/browse/HIVE-16840?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=16040150#comment-16040150 ] liyunzhang_intel edited comment on HIVE-16840 at 6/7/17 5:09 AM: - [~xuefuz],[~lirui],[~Ferd], [~csun]: Here provide 2 solutions to solve it 1. add an extra reduce to save the result of order and a new job to finish select * from (tmp result of order) limit N. 2. create SortByLimitShuffler like [SortByShuffle|https://github.com/apache/hive/blob/master/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SortByShuffler.java], and implement order by limit N like following {code} val composite1 = sc.parallelize(1 to 200, 10).map(p=>(1-p,p)).sortByKey().zipWithIndex().filter{case (_, idx) => idx < 5} {code} sortByKey+zipWithIndex+filter to implement orderByLimit. if we use this way, we may need remove limit operator from reduce tree. Comparing option1 and option2: the disadvantage of optioin1 is we create an extra job to load the result of sort to do limit . But the sort is executed in parallel. The time of saving result of sort and loading result of sort is time consuming. the disadvantage of option2 is {{zipWithIndex}} [triggers|https://spark.apache.org/docs/2.0.1/api/java/org/apache/spark/rdd/RDD.html#zipWithIndex()] extra spark job to do when RDD contains more than one partitions. This will cause time consuming. Appreciate to get some suggestions from you. If you have any idea about it, please tell me. was (Author: kellyzly): [~xuefuz],[~lirui],[~Ferd], [~csun]: Here provide 2 solutions to solve it 1. add an extra reduce to save the result of order and a new job to finish select * from (tmp result of order) limit N. 2. create SortByLimitShuffler like [SortByShuffle|https://github.com/apache/hive/blob/master/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SortByShuffler.java], and implement order by limit N like following {code} val composite1 = sc.parallelize(1 to 200, 10).map(p=>(1-p,p)).sortByKey().zipWithIndex().filter{case (_, idx) => idx < 5} {code} sortByKey+zipWithIndex+filter to implement orderByLimit. if we use this way, we may need remove limit operator from reduce tree. Comparing option1 and option2: the disadvantage of optioin1 is we create an extra job to load the result of sort to do limit . But the sort is executed in parallel. the disadvantage of option2 is {{zipWithIndex}} [triggers|https://spark.apache.org/docs/2.0.1/api/java/org/apache/spark/rdd/RDD.html#zipWithIndex()] spark job to do when RDD contains more than one partitions. This will cause time consuming. Appreciate to get some suggestions from you. If you have any idea about it, please tell me. > Investigate the performance of order by limit in HoS > > > Key: HIVE-16840 > URL: https://issues.apache.org/jira/browse/HIVE-16840 > Project: Hive > Issue Type: Bug >Reporter: liyunzhang_intel >Assignee: liyunzhang_intel > > We found that on 1TB data of TPC-DS, q17 of TPC-DS hanged. > {code} > select i_item_id >,i_item_desc >,s_state >,count(ss_quantity) as store_sales_quantitycount >,avg(ss_quantity) as store_sales_quantityave >,stddev_samp(ss_quantity) as store_sales_quantitystdev >,stddev_samp(ss_quantity)/avg(ss_quantity) as store_sales_quantitycov >,count(sr_return_quantity) as_store_returns_quantitycount >,avg(sr_return_quantity) as_store_returns_quantityave >,stddev_samp(sr_return_quantity) as_store_returns_quantitystdev >,stddev_samp(sr_return_quantity)/avg(sr_return_quantity) as > store_returns_quantitycov >,count(cs_quantity) as catalog_sales_quantitycount ,avg(cs_quantity) > as catalog_sales_quantityave >,stddev_samp(cs_quantity)/avg(cs_quantity) as > catalog_sales_quantitystdev >,stddev_samp(cs_quantity)/avg(cs_quantity) as catalog_sales_quantitycov > from store_sales > ,store_returns > ,catalog_sales > ,date_dim d1 > ,date_dim d2 > ,date_dim d3 > ,store > ,item > where d1.d_quarter_name = '2000Q1' >and d1.d_date_sk = store_sales.ss_sold_date_sk >and item.i_item_sk = store_sales.ss_item_sk >and store.s_store_sk = store_sales.ss_store_sk >and store_sales.ss_customer_sk = store_returns.sr_customer_sk >and store_sales.ss_item_sk = store_returns.sr_item_sk >and store_sales.ss_ticket_number = store_returns.sr_ticket_number >and store_returns.sr_returned_date_sk = d2.d_date_sk >and d2.d_quarter_name in ('2000Q1','2000Q2','2000Q3') >and store_returns.sr_customer_sk = catalog_sales.cs_bill_customer_sk >and store_returns.sr_item_sk = catalog_sales.cs_item_sk >and catalog_sales.cs_sold_date_sk = d3.d_date_sk >and d3.d_quarter_name in ('2000Q1','2000Q2','2000Q3') >
[jira] [Issue Comment Deleted] (HIVE-15101) Spark client process can be stuck when UNHEALTHY NodeManager exists
[ https://issues.apache.org/jira/browse/HIVE-15101?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Ferdinand Xu updated HIVE-15101: Comment: was deleted (was: Hi, Thanks for your email. I am OOO to support demo at SPARK SUMMIT and some customer engagements. Please expect some delays in my response. Yours, Ferdinand Xu ) > Spark client process can be stuck when UNHEALTHY NodeManager exists > --- > > Key: HIVE-15101 > URL: https://issues.apache.org/jira/browse/HIVE-15101 > Project: Hive > Issue Type: Bug > Components: Spark >Affects Versions: 2.0.0, 2.1.0 > Environment: Hive 2.1.0 > Spark 1.6.2 >Reporter: Satoshi Iijima >Assignee: Satoshi Iijima > Attachments: hadoop-yarn-nodemanager.log, HIVE-15101.patch, > hive.log.gz > > > When a Hive-on-Spark job is executed on YARN environment where UNHEALTHY > NodeManager exists, Spark client can be stuck in RUNNING state. > thread dump: > {code} > "008ee7b6-b083-4ac9-ae1c-b6097d9bf761 main" #1 prio=5 os_prio=0 > tid=0x7f14f4013800 nid=0x3855 in Object.wait() [0x7f14fd9b1000] >java.lang.Thread.State: WAITING (on object monitor) > at java.lang.Object.wait(Native Method) > - waiting on <0xf6615550> (a > io.netty.util.concurrent.DefaultPromise) > at java.lang.Object.wait(Object.java:502) > at > io.netty.util.concurrent.DefaultPromise.await(DefaultPromise.java:254) > - locked <0xf6615550> (a > io.netty.util.concurrent.DefaultPromise) > at io.netty.util.concurrent.DefaultPromise.await(DefaultPromise.java:32) > at io.netty.util.concurrent.AbstractFuture.get(AbstractFuture.java:31) > at > org.apache.hive.spark.client.SparkClientImpl.(SparkClientImpl.java:104) > at > org.apache.hive.spark.client.SparkClientFactory.createClient(SparkClientFactory.java:80) > - locked <0xf21b8e08> (a java.lang.Class for > org.apache.hive.spark.client.SparkClientFactory) > at > org.apache.hadoop.hive.ql.exec.spark.RemoteHiveSparkClient.createRemoteClient(RemoteHiveSparkClient.java:99) > at > org.apache.hadoop.hive.ql.exec.spark.RemoteHiveSparkClient.(RemoteHiveSparkClient.java:95) > at > org.apache.hadoop.hive.ql.exec.spark.HiveSparkClientFactory.createHiveSparkClient(HiveSparkClientFactory.java:67) > at > org.apache.hadoop.hive.ql.exec.spark.session.SparkSessionImpl.open(SparkSessionImpl.java:62) > at > org.apache.hadoop.hive.ql.exec.spark.session.SparkSessionManagerImpl.getSession(SparkSessionManagerImpl.java:114) > at > org.apache.hadoop.hive.ql.exec.spark.SparkUtilities.getSparkSession(SparkUtilities.java:136) > at > org.apache.hadoop.hive.ql.exec.spark.SparkTask.execute(SparkTask.java:89) > at org.apache.hadoop.hive.ql.exec.Task.executeTask(Task.java:197) > at > org.apache.hadoop.hive.ql.exec.TaskRunner.runSequential(TaskRunner.java:100) > at org.apache.hadoop.hive.ql.Driver.launchTask(Driver.java:1858) > at org.apache.hadoop.hive.ql.Driver.execute(Driver.java:1562) > at org.apache.hadoop.hive.ql.Driver.runInternal(Driver.java:1313) > at org.apache.hadoop.hive.ql.Driver.run(Driver.java:1084) > at org.apache.hadoop.hive.ql.Driver.run(Driver.java:1072) > at > org.apache.hadoop.hive.cli.CliDriver.processLocalCmd(CliDriver.java:232) > at org.apache.hadoop.hive.cli.CliDriver.processCmd(CliDriver.java:183) > at org.apache.hadoop.hive.cli.CliDriver.processLine(CliDriver.java:399) > at org.apache.hadoop.hive.cli.CliDriver.processLine(CliDriver.java:335) > at > org.apache.hadoop.hive.cli.CliDriver.executeDriver(CliDriver.java:742) > at org.apache.hadoop.hive.cli.CliDriver.run(CliDriver.java:714) > at org.apache.hadoop.hive.cli.CliDriver.main(CliDriver.java:641) > at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) > at > sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) > at > sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) > at java.lang.reflect.Method.invoke(Method.java:498) > at org.apache.hadoop.util.RunJar.run(RunJar.java:239) > at org.apache.hadoop.util.RunJar.main(RunJar.java:153) > {code} -- This message was sent by Atlassian JIRA (v6.3.15#6346)
[jira] [Comment Edited] (HIVE-16840) Investigate the performance of order by limit in HoS
[ https://issues.apache.org/jira/browse/HIVE-16840?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=16040150#comment-16040150 ] liyunzhang_intel edited comment on HIVE-16840 at 6/7/17 4:59 AM: - [~xuefuz],[~lirui],[~Ferd], [~csun]: Here provide 2 solutions to solve it 1. add an extra reduce to save the result of order and a new job to finish select * from (tmp result of order) limit N. 2. create SortByLimitShuffler like [SortByShuffle|https://github.com/apache/hive/blob/master/ql/src/java/org/apache/hadoop/hive/ql/exec/spark/SortByShuffler.java], and implement order by limit N like following {code} val composite1 = sc.parallelize(1 to 200, 10).map(p=>(1-p,p)).sortByKey().zipWithIndex().filter{case (_, idx) => idx < 5} {code} sortByKey+zipWithIndex+filter to implement orderByLimit. if we use this way, we may need remove limit operator from reduce tree. Comparing option1 and option2: the disadvantage of optioin1 is we create an extra job to load the result of sort to do limit . But the sort is executed in parallel. the disadvantage of option2 is {{zipWithIndex}} [triggers|https://spark.apache.org/docs/2.0.1/api/java/org/apache/spark/rdd/RDD.html#zipWithIndex()] spark job to do when RDD contains more than one partitions. This will cause time consuming. Appreciate to get some suggestions from you. If you have any idea about it, please tell me. was (Author: kellyzly): [~xuefuz],[~lirui],[~Ferd], [~csun]: Here provide 2 solutions to solve it 1. add an extra reduce to save the result of order and a new job to finish select * from (tmp result of order) limit N. 2. create SortByLimitShuffler and implement order by limit N like following {code} val composite1 = sc.parallelize(1 to 200, 10).map(p=>(1-p,p)).sortByKey().zipWithIndex().filter{case (_, idx) => idx < 5} {code} sortByKey+zipWithIndex+filter to implement orderByLimit. if we use this way, we may need remove limit operator from reduce tree. Appreciate to get some suggestions from you. > Investigate the performance of order by limit in HoS > > > Key: HIVE-16840 > URL: https://issues.apache.org/jira/browse/HIVE-16840 > Project: Hive > Issue Type: Bug >Reporter: liyunzhang_intel >Assignee: liyunzhang_intel > > We found that on 1TB data of TPC-DS, q17 of TPC-DS hanged. > {code} > select i_item_id >,i_item_desc >,s_state >,count(ss_quantity) as store_sales_quantitycount >,avg(ss_quantity) as store_sales_quantityave >,stddev_samp(ss_quantity) as store_sales_quantitystdev >,stddev_samp(ss_quantity)/avg(ss_quantity) as store_sales_quantitycov >,count(sr_return_quantity) as_store_returns_quantitycount >,avg(sr_return_quantity) as_store_returns_quantityave >,stddev_samp(sr_return_quantity) as_store_returns_quantitystdev >,stddev_samp(sr_return_quantity)/avg(sr_return_quantity) as > store_returns_quantitycov >,count(cs_quantity) as catalog_sales_quantitycount ,avg(cs_quantity) > as catalog_sales_quantityave >,stddev_samp(cs_quantity)/avg(cs_quantity) as > catalog_sales_quantitystdev >,stddev_samp(cs_quantity)/avg(cs_quantity) as catalog_sales_quantitycov > from store_sales > ,store_returns > ,catalog_sales > ,date_dim d1 > ,date_dim d2 > ,date_dim d3 > ,store > ,item > where d1.d_quarter_name = '2000Q1' >and d1.d_date_sk = store_sales.ss_sold_date_sk >and item.i_item_sk = store_sales.ss_item_sk >and store.s_store_sk = store_sales.ss_store_sk >and store_sales.ss_customer_sk = store_returns.sr_customer_sk >and store_sales.ss_item_sk = store_returns.sr_item_sk >and store_sales.ss_ticket_number = store_returns.sr_ticket_number >and store_returns.sr_returned_date_sk = d2.d_date_sk >and d2.d_quarter_name in ('2000Q1','2000Q2','2000Q3') >and store_returns.sr_customer_sk = catalog_sales.cs_bill_customer_sk >and store_returns.sr_item_sk = catalog_sales.cs_item_sk >and catalog_sales.cs_sold_date_sk = d3.d_date_sk >and d3.d_quarter_name in ('2000Q1','2000Q2','2000Q3') > group by i_item_id > ,i_item_desc > ,s_state > order by i_item_id > ,i_item_desc > ,s_state > limit 100; > {code} > the reason why the script hanged is because we only use 1 task to implement > sort. > {code} > STAGE PLANS: > Stage: Stage-1 > Spark > Edges: > Reducer 10 <- Reducer 9 (SORT, 1) > Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 889), Map 11 > (PARTITION-LEVEL SORT, 889) > Reducer 3 <- Map 12 (PARTITION-LEVEL SORT, 1009), Reducer 2 > (PARTITION-LEVEL SORT, 1009) > Reducer 4 <- Map 13 (PARTITION-LEVEL SORT, 683), Reducer 3 > (PARTITION-LEVEL SORT, 683) > Reducer 5 <- Map 14
[jira] [Commented] (HIVE-16840) Investigate the performance of order by limit in HoS
[ https://issues.apache.org/jira/browse/HIVE-16840?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=16040150#comment-16040150 ] liyunzhang_intel commented on HIVE-16840: - [~xuefuz],[~lirui],[~Ferd], [~csun]: Here provide 2 solutions to solve it 1. add an extra reduce to save the result of order and a new job to finish select * from (tmp result of order) limit N. 2. create SortByLimitShuffler and implement order by limit N like following {code} val composite1 = sc.parallelize(1 to 200, 10).map(p=>(1-p,p)).sortByKey().zipWithIndex().filter{case (_, idx) => idx < 5} {code} sortByKey+zipWithIndex+filter to implement orderByLimit. if we use this way, we may need remove limit operator from reduce tree. Appreciate to get some suggestions from you. > Investigate the performance of order by limit in HoS > > > Key: HIVE-16840 > URL: https://issues.apache.org/jira/browse/HIVE-16840 > Project: Hive > Issue Type: Bug >Reporter: liyunzhang_intel >Assignee: liyunzhang_intel > > We found that on 1TB data of TPC-DS, q17 of TPC-DS hanged. > {code} > select i_item_id >,i_item_desc >,s_state >,count(ss_quantity) as store_sales_quantitycount >,avg(ss_quantity) as store_sales_quantityave >,stddev_samp(ss_quantity) as store_sales_quantitystdev >,stddev_samp(ss_quantity)/avg(ss_quantity) as store_sales_quantitycov >,count(sr_return_quantity) as_store_returns_quantitycount >,avg(sr_return_quantity) as_store_returns_quantityave >,stddev_samp(sr_return_quantity) as_store_returns_quantitystdev >,stddev_samp(sr_return_quantity)/avg(sr_return_quantity) as > store_returns_quantitycov >,count(cs_quantity) as catalog_sales_quantitycount ,avg(cs_quantity) > as catalog_sales_quantityave >,stddev_samp(cs_quantity)/avg(cs_quantity) as > catalog_sales_quantitystdev >,stddev_samp(cs_quantity)/avg(cs_quantity) as catalog_sales_quantitycov > from store_sales > ,store_returns > ,catalog_sales > ,date_dim d1 > ,date_dim d2 > ,date_dim d3 > ,store > ,item > where d1.d_quarter_name = '2000Q1' >and d1.d_date_sk = store_sales.ss_sold_date_sk >and item.i_item_sk = store_sales.ss_item_sk >and store.s_store_sk = store_sales.ss_store_sk >and store_sales.ss_customer_sk = store_returns.sr_customer_sk >and store_sales.ss_item_sk = store_returns.sr_item_sk >and store_sales.ss_ticket_number = store_returns.sr_ticket_number >and store_returns.sr_returned_date_sk = d2.d_date_sk >and d2.d_quarter_name in ('2000Q1','2000Q2','2000Q3') >and store_returns.sr_customer_sk = catalog_sales.cs_bill_customer_sk >and store_returns.sr_item_sk = catalog_sales.cs_item_sk >and catalog_sales.cs_sold_date_sk = d3.d_date_sk >and d3.d_quarter_name in ('2000Q1','2000Q2','2000Q3') > group by i_item_id > ,i_item_desc > ,s_state > order by i_item_id > ,i_item_desc > ,s_state > limit 100; > {code} > the reason why the script hanged is because we only use 1 task to implement > sort. > {code} > STAGE PLANS: > Stage: Stage-1 > Spark > Edges: > Reducer 10 <- Reducer 9 (SORT, 1) > Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 889), Map 11 > (PARTITION-LEVEL SORT, 889) > Reducer 3 <- Map 12 (PARTITION-LEVEL SORT, 1009), Reducer 2 > (PARTITION-LEVEL SORT, 1009) > Reducer 4 <- Map 13 (PARTITION-LEVEL SORT, 683), Reducer 3 > (PARTITION-LEVEL SORT, 683) > Reducer 5 <- Map 14 (PARTITION-LEVEL SORT, 751), Reducer 4 > (PARTITION-LEVEL SORT, 751) > Reducer 6 <- Map 15 (PARTITION-LEVEL SORT, 826), Reducer 5 > (PARTITION-LEVEL SORT, 826) > Reducer 7 <- Map 16 (PARTITION-LEVEL SORT, 909), Reducer 6 > (PARTITION-LEVEL SORT, 909) > Reducer 8 <- Map 17 (PARTITION-LEVEL SORT, 1001), Reducer 7 > (PARTITION-LEVEL SORT, 1001) > Reducer 9 <- Reducer 8 (GROUP, 2) > {code} > The parallelism of Reducer 9 is 1. It is a orderby limit case so we use 1 > task to execute to ensure the correctness. But the performance is poor. -- This message was sent by Atlassian JIRA (v6.3.15#6346)
[jira] [Commented] (HIVE-16832) duplicate ROW__ID possible in multi insert into transactional table
[ https://issues.apache.org/jira/browse/HIVE-16832?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=16040144#comment-16040144 ] Hive QA commented on HIVE-16832: Here are the results of testing the latest attachment: https://issues.apache.org/jira/secure/attachment/12871718/HIVE-16832.01.patch {color:green}SUCCESS:{color} +1 due to 4 test(s) being added or modified. {color:red}ERROR:{color} -1 due to 48 failed/errored test(s), 10832 tests executed *Failed tests:* {noformat} org.apache.hadoop.hive.cli.TestBeeLineDriver.testCliDriver[materialized_view_create_rewrite] (batchId=237) org.apache.hadoop.hive.cli.TestMiniLlapCliDriver.testCliDriver[orc_ppd_basic] (batchId=140) org.apache.hadoop.hive.cli.TestMiniLlapLocalCliDriver.testCliDriver[columnstats_part_coltype] (batchId=157) org.apache.hadoop.hive.cli.TestPerfCliDriver.testCliDriver[query14] (batchId=232) org.apache.hadoop.hive.cli.TestPerfCliDriver.testCliDriver[query78] (batchId=232) org.apache.hadoop.hive.ql.io.orc.TestInputOutputFormat.testCombinationInputFormatWithAcid (batchId=261) org.apache.hadoop.hive.ql.io.orc.TestInputOutputFormat.testVectorizationWithAcid (batchId=261) org.apache.hadoop.hive.ql.io.orc.TestOrcRawRecordMerger.testEmpty (batchId=261) org.apache.hadoop.hive.ql.io.orc.TestOrcRawRecordMerger.testNewBaseAndDelta (batchId=261) org.apache.hadoop.hive.ql.io.orc.TestOrcRawRecordMerger.testRecordReaderDelta (batchId=261) org.apache.hadoop.hive.ql.io.orc.TestOrcRawRecordMerger.testRecordReaderIncompleteDelta (batchId=261) org.apache.hadoop.hive.ql.io.orc.TestOrcRawRecordMerger.testRecordReaderNewBaseAndDelta (batchId=261) org.apache.hadoop.hive.ql.io.orc.TestOrcRawRecordMerger.testRecordReaderOldBaseAndDelta (batchId=261) org.apache.hadoop.hive.ql.io.orc.TestOrcRecordUpdater.testUpdates (batchId=262) org.apache.hadoop.hive.ql.io.orc.TestOrcRecordUpdater.testWriter (batchId=262) org.apache.hadoop.hive.ql.io.orc.TestOrcRecordUpdater.testWriterTblProperties (batchId=262) org.apache.hadoop.hive.ql.io.orc.TestVectorizedOrcAcidRowBatchReader.testCanCreateVectorizedAcidRowBatchReaderOnSplit (batchId=261) org.apache.hadoop.hive.ql.io.orc.TestVectorizedOrcAcidRowBatchReader.testVectorizedOrcAcidRowBatchReader (batchId=261) org.apache.hadoop.hive.ql.txn.compactor.TestCompactor.majorCompactAfterAbort (batchId=214) org.apache.hadoop.hive.ql.txn.compactor.TestCompactor.majorCompactWhileStreaming (batchId=214) org.apache.hadoop.hive.ql.txn.compactor.TestCompactor.majorCompactWhileStreamingForSplitUpdate (batchId=214) org.apache.hadoop.hive.ql.txn.compactor.TestCompactor.minorCompactAfterAbort (batchId=214) org.apache.hadoop.hive.ql.txn.compactor.TestCompactor.minorCompactWhileStreaming (batchId=214) org.apache.hadoop.hive.ql.txn.compactor.TestCompactor.minorCompactWhileStreamingWithSplitUpdate (batchId=214) org.apache.hadoop.hive.ql.txn.compactor.TestCompactor.testStatsAfterCompactionPartTbl (batchId=214) org.apache.hive.hcatalog.streaming.TestStreaming.testBucketing (batchId=189) org.apache.hive.hcatalog.streaming.TestStreaming.testBucketingWhereBucketColIsNotFirstCol (batchId=189) org.apache.hive.hcatalog.streaming.TestStreaming.testConcurrentTransactionBatchCommits (batchId=189) org.apache.hive.hcatalog.streaming.TestStreaming.testErrorHandling (batchId=189) org.apache.hive.hcatalog.streaming.TestStreaming.testFileDump (batchId=189) org.apache.hive.hcatalog.streaming.TestStreaming.testFileDumpCorruptDataFiles (batchId=189) org.apache.hive.hcatalog.streaming.TestStreaming.testFileDumpCorruptSideFiles (batchId=189) org.apache.hive.hcatalog.streaming.TestStreaming.testInterleavedTransactionBatchCommits (batchId=189) org.apache.hive.hcatalog.streaming.TestStreaming.testMultipleTransactionBatchCommits (batchId=189) org.apache.hive.hcatalog.streaming.TestStreaming.testRemainingTransactions (batchId=189) org.apache.hive.hcatalog.streaming.TestStreaming.testStreamBucketingMatchesRegularBucketing (batchId=189) org.apache.hive.hcatalog.streaming.TestStreaming.testTransactionBatchAbort (batchId=189) org.apache.hive.hcatalog.streaming.TestStreaming.testTransactionBatchAbortAndCommit (batchId=189) org.apache.hive.hcatalog.streaming.TestStreaming.testTransactionBatchCommit_Delimited (batchId=189) org.apache.hive.hcatalog.streaming.TestStreaming.testTransactionBatchCommit_DelimitedUGI (batchId=189) org.apache.hive.hcatalog.streaming.TestStreaming.testTransactionBatchCommit_Json (batchId=189) org.apache.hive.hcatalog.streaming.TestStreaming.testTransactionBatchCommit_Regex (batchId=189) org.apache.hive.hcatalog.streaming.TestStreaming.testTransactionBatchCommit_RegexUGI (batchId=189) org.apache.hive.hcatalog.streaming.mutate.TestMutations.testMulti (batchId=189) org.apache.hive.hcatalog.streaming.mutate.TestMutations.testTransactionBatchAbort (batchId=189) org.apache.hive.hcatalog.streaming.mutate.TestMutations.testTransactionBatchCommitPartitioned
[jira] [Updated] (HIVE-16667) PostgreSQL metastore handling of CLOB types for COLUMNS_V2.TYPE_NAME and other field is incorrect
[ https://issues.apache.org/jira/browse/HIVE-16667?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Naveen Gangam updated HIVE-16667: - Status: Patch Available (was: Open) > PostgreSQL metastore handling of CLOB types for COLUMNS_V2.TYPE_NAME and > other field is incorrect > - > > Key: HIVE-16667 > URL: https://issues.apache.org/jira/browse/HIVE-16667 > Project: Hive > Issue Type: Bug >Reporter: Remus Rusanu >Assignee: Naveen Gangam > Attachments: HIVE-16667.2.patch, HIVE-16667.3.patch, > HIVE-16667.patch, HiveCLIOutput.txt, PostgresDBOutput.txt > > > The CLOB JDO type introduced with HIVE-12274 does not work correctly with > PostgreSQL. The value is written out-of-band and the LOB handle is written,as > an INT, into the table. SELECTs return the INT value, which should had been > read via the {{lo_get}} PG built-in, and then cast into string. > Furthermore, the behavior is different between fields upgraded from earlier > metastore versions (they retain their string storage) vs. values inserted > after the upgrade (inserted as LOB roots). > Teh code in > {{MetasoreDirectSql.getPartitionsFromPartitionIds/extractSqlClob}} expects > the underlying JDO/Datanucleus to map the column to a {{Clob}} but that does > not happen, the value is a Java String containing the int which is the LOB > root saved by PG. > This manifests at runtime with errors like: > {code} > hive> select * from srcpart; > Failed with exception java.io.IOException:java.lang.IllegalArgumentException: > Error: type expected at the position 0 of '24030:24031' but '24030' is found. > {code} > the 24030:24031 should be 'string:string'. > repro: > {code} > CREATE TABLE srcpart (key STRING COMMENT 'default', value STRING COMMENT > 'default') PARTITIONED BY (ds STRING, hr STRING) STORED AS TEXTFILE; > LOAD DATA LOCAL INPATH "${hiveconf:test.data.dir}/kv1.txt" OVERWRITE INTO > TABLE srcpart PARTITION (ds="2008-04-09", hr="11"); > select * from srcpart; > {code} > I did not see the issue being hit by non-partitioned/textfile tables, but > that is just the luck of the path taken by the code. Inspection of my PG > metastore shows all the CLOB fields suffering from this issue. -- This message was sent by Atlassian JIRA (v6.3.15#6346)
[jira] [Commented] (HIVE-16667) PostgreSQL metastore handling of CLOB types for COLUMNS_V2.TYPE_NAME and other field is incorrect
[ https://issues.apache.org/jira/browse/HIVE-16667?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=16040097#comment-16040097 ] Naveen Gangam commented on HIVE-16667: -- The unit test failures are related to the patch. I had not specified VARCHAR length in the mapping hoping the datanucleus would map it max length permitted for {{java.lang.String}}. Based on the error message in the test failures {{Caused by: java.sql.SQLDataException: A truncation error was encountered trying to shrink VARCHAR '{"BASIC_STATS":"true","COLUMN_STATS":{"c_bigint":"true","c_b&' to length 255}} it appears that it is attempting to limit to 255 which is too short. I then tried testing with max length which also caused problems with DERBY as it has a checks the lengh https://fossies.org/dox/db-derby-10.13.1.1-src/interfaceorg_1_1apache_1_1derby_1_1iapi_1_1reference_1_1Limits.html#a4e223303a7751ae27dfdd8e62e993588 So for I think we are kinda limited to ~32k for max value. As this is an improvement compared to where we were at and only a limitation for derby, we should use this value for this release and fix the postgres issue in the next release. I am uploading a patch with a length in the JDO mapping file. > PostgreSQL metastore handling of CLOB types for COLUMNS_V2.TYPE_NAME and > other field is incorrect > - > > Key: HIVE-16667 > URL: https://issues.apache.org/jira/browse/HIVE-16667 > Project: Hive > Issue Type: Bug >Reporter: Remus Rusanu >Assignee: Naveen Gangam > Attachments: HIVE-16667.2.patch, HIVE-16667.3.patch, > HIVE-16667.patch, HiveCLIOutput.txt, PostgresDBOutput.txt > > > The CLOB JDO type introduced with HIVE-12274 does not work correctly with > PostgreSQL. The value is written out-of-band and the LOB handle is written,as > an INT, into the table. SELECTs return the INT value, which should had been > read via the {{lo_get}} PG built-in, and then cast into string. > Furthermore, the behavior is different between fields upgraded from earlier > metastore versions (they retain their string storage) vs. values inserted > after the upgrade (inserted as LOB roots). > Teh code in > {{MetasoreDirectSql.getPartitionsFromPartitionIds/extractSqlClob}} expects > the underlying JDO/Datanucleus to map the column to a {{Clob}} but that does > not happen, the value is a Java String containing the int which is the LOB > root saved by PG. > This manifests at runtime with errors like: > {code} > hive> select * from srcpart; > Failed with exception java.io.IOException:java.lang.IllegalArgumentException: > Error: type expected at the position 0 of '24030:24031' but '24030' is found. > {code} > the 24030:24031 should be 'string:string'. > repro: > {code} > CREATE TABLE srcpart (key STRING COMMENT 'default', value STRING COMMENT > 'default') PARTITIONED BY (ds STRING, hr STRING) STORED AS TEXTFILE; > LOAD DATA LOCAL INPATH "${hiveconf:test.data.dir}/kv1.txt" OVERWRITE INTO > TABLE srcpart PARTITION (ds="2008-04-09", hr="11"); > select * from srcpart; > {code} > I did not see the issue being hit by non-partitioned/textfile tables, but > that is just the luck of the path taken by the code. Inspection of my PG > metastore shows all the CLOB fields suffering from this issue. -- This message was sent by Atlassian JIRA (v6.3.15#6346)
[jira] [Updated] (HIVE-16667) PostgreSQL metastore handling of CLOB types for COLUMNS_V2.TYPE_NAME and other field is incorrect
[ https://issues.apache.org/jira/browse/HIVE-16667?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Naveen Gangam updated HIVE-16667: - Attachment: HIVE-16667.3.patch > PostgreSQL metastore handling of CLOB types for COLUMNS_V2.TYPE_NAME and > other field is incorrect > - > > Key: HIVE-16667 > URL: https://issues.apache.org/jira/browse/HIVE-16667 > Project: Hive > Issue Type: Bug >Reporter: Remus Rusanu >Assignee: Naveen Gangam > Attachments: HIVE-16667.2.patch, HIVE-16667.3.patch, > HIVE-16667.patch, HiveCLIOutput.txt, PostgresDBOutput.txt > > > The CLOB JDO type introduced with HIVE-12274 does not work correctly with > PostgreSQL. The value is written out-of-band and the LOB handle is written,as > an INT, into the table. SELECTs return the INT value, which should had been > read via the {{lo_get}} PG built-in, and then cast into string. > Furthermore, the behavior is different between fields upgraded from earlier > metastore versions (they retain their string storage) vs. values inserted > after the upgrade (inserted as LOB roots). > Teh code in > {{MetasoreDirectSql.getPartitionsFromPartitionIds/extractSqlClob}} expects > the underlying JDO/Datanucleus to map the column to a {{Clob}} but that does > not happen, the value is a Java String containing the int which is the LOB > root saved by PG. > This manifests at runtime with errors like: > {code} > hive> select * from srcpart; > Failed with exception java.io.IOException:java.lang.IllegalArgumentException: > Error: type expected at the position 0 of '24030:24031' but '24030' is found. > {code} > the 24030:24031 should be 'string:string'. > repro: > {code} > CREATE TABLE srcpart (key STRING COMMENT 'default', value STRING COMMENT > 'default') PARTITIONED BY (ds STRING, hr STRING) STORED AS TEXTFILE; > LOAD DATA LOCAL INPATH "${hiveconf:test.data.dir}/kv1.txt" OVERWRITE INTO > TABLE srcpart PARTITION (ds="2008-04-09", hr="11"); > select * from srcpart; > {code} > I did not see the issue being hit by non-partitioned/textfile tables, but > that is just the luck of the path taken by the code. Inspection of my PG > metastore shows all the CLOB fields suffering from this issue. -- This message was sent by Atlassian JIRA (v6.3.15#6346)
[jira] [Updated] (HIVE-16667) PostgreSQL metastore handling of CLOB types for COLUMNS_V2.TYPE_NAME and other field is incorrect
[ https://issues.apache.org/jira/browse/HIVE-16667?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Naveen Gangam updated HIVE-16667: - Status: Open (was: Patch Available) > PostgreSQL metastore handling of CLOB types for COLUMNS_V2.TYPE_NAME and > other field is incorrect > - > > Key: HIVE-16667 > URL: https://issues.apache.org/jira/browse/HIVE-16667 > Project: Hive > Issue Type: Bug >Reporter: Remus Rusanu >Assignee: Naveen Gangam > Attachments: HIVE-16667.2.patch, HIVE-16667.3.patch, > HIVE-16667.patch, HiveCLIOutput.txt, PostgresDBOutput.txt > > > The CLOB JDO type introduced with HIVE-12274 does not work correctly with > PostgreSQL. The value is written out-of-band and the LOB handle is written,as > an INT, into the table. SELECTs return the INT value, which should had been > read via the {{lo_get}} PG built-in, and then cast into string. > Furthermore, the behavior is different between fields upgraded from earlier > metastore versions (they retain their string storage) vs. values inserted > after the upgrade (inserted as LOB roots). > Teh code in > {{MetasoreDirectSql.getPartitionsFromPartitionIds/extractSqlClob}} expects > the underlying JDO/Datanucleus to map the column to a {{Clob}} but that does > not happen, the value is a Java String containing the int which is the LOB > root saved by PG. > This manifests at runtime with errors like: > {code} > hive> select * from srcpart; > Failed with exception java.io.IOException:java.lang.IllegalArgumentException: > Error: type expected at the position 0 of '24030:24031' but '24030' is found. > {code} > the 24030:24031 should be 'string:string'. > repro: > {code} > CREATE TABLE srcpart (key STRING COMMENT 'default', value STRING COMMENT > 'default') PARTITIONED BY (ds STRING, hr STRING) STORED AS TEXTFILE; > LOAD DATA LOCAL INPATH "${hiveconf:test.data.dir}/kv1.txt" OVERWRITE INTO > TABLE srcpart PARTITION (ds="2008-04-09", hr="11"); > select * from srcpart; > {code} > I did not see the issue being hit by non-partitioned/textfile tables, but > that is just the luck of the path taken by the code. Inspection of my PG > metastore shows all the CLOB fields suffering from this issue. -- This message was sent by Atlassian JIRA (v6.3.15#6346)
[jira] [Commented] (HIVE-16831) Add unit tests for NPE fixes in HIVE-12054
[ https://issues.apache.org/jira/browse/HIVE-16831?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=16040084#comment-16040084 ] Hive QA commented on HIVE-16831: Here are the results of testing the latest attachment: https://issues.apache.org/jira/secure/attachment/12871715/HIVE-16831.2.patch {color:green}SUCCESS:{color} +1 due to 1 test(s) being added or modified. {color:red}ERROR:{color} -1 due to 6 failed/errored test(s), 10815 tests executed *Failed tests:* {noformat} org.apache.hadoop.hive.cli.TestMiniLlapCliDriver.testCliDriver[orc_ppd_basic] (batchId=140) org.apache.hadoop.hive.cli.TestMiniLlapLocalCliDriver.testCliDriver[vector_if_expr] (batchId=145) org.apache.hadoop.hive.cli.TestMiniTezCliDriver.testCliDriver[explainanalyze_2] (batchId=99) org.apache.hadoop.hive.cli.TestPerfCliDriver.testCliDriver[query23] (batchId=232) org.apache.hadoop.hive.cli.TestPerfCliDriver.testCliDriver[query78] (batchId=232) org.apache.hadoop.hive.cli.TestSparkNegativeCliDriver.org.apache.hadoop.hive.cli.TestSparkNegativeCliDriver (batchId=239) {noformat} Test results: https://builds.apache.org/job/PreCommit-HIVE-Build/5558/testReport Console output: https://builds.apache.org/job/PreCommit-HIVE-Build/5558/console Test logs: http://104.198.109.242/logs/PreCommit-HIVE-Build-5558/ Messages: {noformat} Executing org.apache.hive.ptest.execution.TestCheckPhase Executing org.apache.hive.ptest.execution.PrepPhase Executing org.apache.hive.ptest.execution.ExecutionPhase Executing org.apache.hive.ptest.execution.ReportingPhase Tests exited with: TestsFailedException: 6 tests failed {noformat} This message is automatically generated. ATTACHMENT ID: 12871715 - PreCommit-HIVE-Build > Add unit tests for NPE fixes in HIVE-12054 > -- > > Key: HIVE-16831 > URL: https://issues.apache.org/jira/browse/HIVE-16831 > Project: Hive > Issue Type: Bug > Components: Hive >Reporter: Sunitha Beeram >Assignee: Sunitha Beeram > Attachments: HIVE-16831.1.patch, HIVE-16831.2.patch > > > HIVE-12054 fixed NPE issues related to ObjectInspector which get triggered > when an empty ORC table/partition is read. > This work adds tests that trigger that path. -- This message was sent by Atlassian JIRA (v6.3.15#6346)
[jira] [Assigned] (HIVE-16840) Investigate the performance of order by limit in HoS
[ https://issues.apache.org/jira/browse/HIVE-16840?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] liyunzhang_intel reassigned HIVE-16840: --- > Investigate the performance of order by limit in HoS > > > Key: HIVE-16840 > URL: https://issues.apache.org/jira/browse/HIVE-16840 > Project: Hive > Issue Type: Bug >Reporter: liyunzhang_intel >Assignee: liyunzhang_intel > > We found that on 1TB data of TPC-DS, q17 of TPC-DS hanged. > {code} > select i_item_id >,i_item_desc >,s_state >,count(ss_quantity) as store_sales_quantitycount >,avg(ss_quantity) as store_sales_quantityave >,stddev_samp(ss_quantity) as store_sales_quantitystdev >,stddev_samp(ss_quantity)/avg(ss_quantity) as store_sales_quantitycov >,count(sr_return_quantity) as_store_returns_quantitycount >,avg(sr_return_quantity) as_store_returns_quantityave >,stddev_samp(sr_return_quantity) as_store_returns_quantitystdev >,stddev_samp(sr_return_quantity)/avg(sr_return_quantity) as > store_returns_quantitycov >,count(cs_quantity) as catalog_sales_quantitycount ,avg(cs_quantity) > as catalog_sales_quantityave >,stddev_samp(cs_quantity)/avg(cs_quantity) as > catalog_sales_quantitystdev >,stddev_samp(cs_quantity)/avg(cs_quantity) as catalog_sales_quantitycov > from store_sales > ,store_returns > ,catalog_sales > ,date_dim d1 > ,date_dim d2 > ,date_dim d3 > ,store > ,item > where d1.d_quarter_name = '2000Q1' >and d1.d_date_sk = store_sales.ss_sold_date_sk >and item.i_item_sk = store_sales.ss_item_sk >and store.s_store_sk = store_sales.ss_store_sk >and store_sales.ss_customer_sk = store_returns.sr_customer_sk >and store_sales.ss_item_sk = store_returns.sr_item_sk >and store_sales.ss_ticket_number = store_returns.sr_ticket_number >and store_returns.sr_returned_date_sk = d2.d_date_sk >and d2.d_quarter_name in ('2000Q1','2000Q2','2000Q3') >and store_returns.sr_customer_sk = catalog_sales.cs_bill_customer_sk >and store_returns.sr_item_sk = catalog_sales.cs_item_sk >and catalog_sales.cs_sold_date_sk = d3.d_date_sk >and d3.d_quarter_name in ('2000Q1','2000Q2','2000Q3') > group by i_item_id > ,i_item_desc > ,s_state > order by i_item_id > ,i_item_desc > ,s_state > limit 100; > {code} > the reason why the script hanged is because we only use 1 task to implement > sort. > {code} > STAGE PLANS: > Stage: Stage-1 > Spark > Edges: > Reducer 10 <- Reducer 9 (SORT, 1) > Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 889), Map 11 > (PARTITION-LEVEL SORT, 889) > Reducer 3 <- Map 12 (PARTITION-LEVEL SORT, 1009), Reducer 2 > (PARTITION-LEVEL SORT, 1009) > Reducer 4 <- Map 13 (PARTITION-LEVEL SORT, 683), Reducer 3 > (PARTITION-LEVEL SORT, 683) > Reducer 5 <- Map 14 (PARTITION-LEVEL SORT, 751), Reducer 4 > (PARTITION-LEVEL SORT, 751) > Reducer 6 <- Map 15 (PARTITION-LEVEL SORT, 826), Reducer 5 > (PARTITION-LEVEL SORT, 826) > Reducer 7 <- Map 16 (PARTITION-LEVEL SORT, 909), Reducer 6 > (PARTITION-LEVEL SORT, 909) > Reducer 8 <- Map 17 (PARTITION-LEVEL SORT, 1001), Reducer 7 > (PARTITION-LEVEL SORT, 1001) > Reducer 9 <- Reducer 8 (GROUP, 2) > {code} > The parallelism of Reducer 9 is 1. It is a orderby limit case so we use 1 > task to execute to ensure the correctness. But the performance is poor. -- This message was sent by Atlassian JIRA (v6.3.15#6346)
[jira] [Commented] (HIVE-15144) JSON.org license is now CatX
[ https://issues.apache.org/jira/browse/HIVE-15144?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=16040027#comment-16040027 ] Hive QA commented on HIVE-15144: Here are the results of testing the latest attachment: https://issues.apache.org/jira/secure/attachment/12871705/HIVE-15144.patch {color:red}ERROR:{color} -1 due to no test(s) being added or modified. {color:red}ERROR:{color} -1 due to 4 failed/errored test(s), 10820 tests executed *Failed tests:* {noformat} org.apache.hadoop.hive.cli.TestMiniLlapCliDriver.testCliDriver[orc_ppd_basic] (batchId=140) org.apache.hadoop.hive.cli.TestPerfCliDriver.testCliDriver[query14] (batchId=232) org.apache.hadoop.hive.cli.TestPerfCliDriver.testCliDriver[query23] (batchId=232) org.apache.hadoop.hive.cli.TestPerfCliDriver.testCliDriver[query78] (batchId=232) {noformat} Test results: https://builds.apache.org/job/PreCommit-HIVE-Build/5557/testReport Console output: https://builds.apache.org/job/PreCommit-HIVE-Build/5557/console Test logs: http://104.198.109.242/logs/PreCommit-HIVE-Build-5557/ Messages: {noformat} Executing org.apache.hive.ptest.execution.TestCheckPhase Executing org.apache.hive.ptest.execution.PrepPhase Executing org.apache.hive.ptest.execution.ExecutionPhase Executing org.apache.hive.ptest.execution.ReportingPhase Tests exited with: TestsFailedException: 4 tests failed {noformat} This message is automatically generated. ATTACHMENT ID: 12871705 - PreCommit-HIVE-Build > JSON.org license is now CatX > > > Key: HIVE-15144 > URL: https://issues.apache.org/jira/browse/HIVE-15144 > Project: Hive > Issue Type: Bug >Reporter: Robert Kanter >Assignee: Owen O'Malley >Priority: Blocker > Fix For: 2.2.0 > > Attachments: HIVE-15144.patch, HIVE-15144.patch, HIVE-15144.patch, > HIVE-15144.patch > > > per [update resolved legal|http://www.apache.org/legal/resolved.html#json]: > {quote} > CAN APACHE PRODUCTS INCLUDE WORKS LICENSED UNDER THE JSON LICENSE? > No. As of 2016-11-03 this has been moved to the 'Category X' license list. > Prior to this, use of the JSON Java library was allowed. See Debian's page > for a list of alternatives. > {quote} > I'm not sure when this dependency was first introduced, but it looks like > it's currently used in a few places: > https://github.com/apache/hive/search?p=1=%22org.json%22=%E2%9C%93 -- This message was sent by Atlassian JIRA (v6.3.15#6346)
[jira] [Commented] (HIVE-16839) Unbalanced calls to openTransaction/commitTransaction when alter the same partition concurrently
[ https://issues.apache.org/jira/browse/HIVE-16839?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=16040007#comment-16040007 ] Nemon Lou commented on HIVE-16839: -- Seems that we need a rollbackTransaction in method getPartition for ObjectStore: {code:java} @Override public Partition getPartition(String dbName, String tableName, List part_vals) throws NoSuchObjectException, MetaException { openTransaction(); Partition part = convertToPart(getMPartition(dbName, tableName, part_vals)); commitTransaction(); if(part == null) { throw new NoSuchObjectException("partition values=" + part_vals.toString()); } part.setValues(part_vals); return part; } {code} > Unbalanced calls to openTransaction/commitTransaction when alter the same > partition concurrently > > > Key: HIVE-16839 > URL: https://issues.apache.org/jira/browse/HIVE-16839 > Project: Hive > Issue Type: Bug >Affects Versions: 1.1.0 >Reporter: Nemon Lou > > SQL to reproduce: > prepare: > {noformat} > hdfs dfs -mkdir -p > /hzsrc/external/writing_dc/ltgsm/16e7a9b2-21a1-3f4f-8061-bc3395281627 > 1,create external table tb_ltgsm_external (id int) PARTITIONED by (cp > string,ld string); > {noformat} > open one beeline run these two sql many times > {noformat} 2,ALTER TABLE tb_ltgsm_external ADD IF NOT EXISTS PARTITION > (cp=2017060513,ld=2017060610); > 3,ALTER TABLE tb_ltgsm_external PARTITION (cp=2017060513,ld=2017060610) SET > LOCATION > 'hdfs://hacluster/hzsrc/external/writing_dc/ltgsm/16e7a9b2-21a1-3f4f-8061-bc3395281627'; > {noformat} > open another beeline to run this sql many times at the same time. > {noformat} > 4,ALTER TABLE tb_ltgsm_external DROP PARTITION (cp=2017060513,ld=2017060610); > {noformat} > MetaStore logs: > {noformat} > 2017-06-06 21:58:34,213 | ERROR | pool-6-thread-197 | Retrying HMSHandler > after 2000 ms (attempt 1 of 10) with error: > javax.jdo.JDOObjectNotFoundException: No such database row > FailedObject:49[OID]org.apache.hadoop.hive.metastore.model.MStorageDescriptor > at > org.datanucleus.api.jdo.NucleusJDOHelper.getJDOExceptionForNucleusException(NucleusJDOHelper.java:475) > at > org.datanucleus.api.jdo.JDOAdapter.getApiExceptionForNucleusException(JDOAdapter.java:1158) > at > org.datanucleus.state.JDOStateManager.isLoaded(JDOStateManager.java:3231) > at > org.apache.hadoop.hive.metastore.model.MStorageDescriptor.jdoGetcd(MStorageDescriptor.java) > at > org.apache.hadoop.hive.metastore.model.MStorageDescriptor.getCD(MStorageDescriptor.java:184) > at > org.apache.hadoop.hive.metastore.ObjectStore.convertToStorageDescriptor(ObjectStore.java:1282) > at > org.apache.hadoop.hive.metastore.ObjectStore.convertToStorageDescriptor(ObjectStore.java:1299) > at > org.apache.hadoop.hive.metastore.ObjectStore.convertToPart(ObjectStore.java:1680) > at > org.apache.hadoop.hive.metastore.ObjectStore.getPartition(ObjectStore.java:1586) > at sun.reflect.GeneratedMethodAccessor35.invoke(Unknown Source) > at > sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) > at java.lang.reflect.Method.invoke(Method.java:497) > at > org.apache.hadoop.hive.metastore.RawStoreProxy.invoke(RawStoreProxy.java:98) > at com.sun.proxy.$Proxy0.getPartition(Unknown Source) > at > org.apache.hadoop.hive.metastore.HiveAlterHandler.alterPartitions(HiveAlterHandler.java:538) > at > org.apache.hadoop.hive.metastore.HiveMetaStore$HMSHandler.alter_partitions(HiveMetaStore.java:3317) > at sun.reflect.GeneratedMethodAccessor37.invoke(Unknown Source) > at > sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) > at java.lang.reflect.Method.invoke(Method.java:497) > at > org.apache.hadoop.hive.metastore.RetryingHMSHandler.invoke(RetryingHMSHandler.java:102) > at com.sun.proxy.$Proxy12.alter_partitions(Unknown Source) > at > org.apache.hadoop.hive.metastore.api.ThriftHiveMetastore$Processor$alter_partitions.getResult(ThriftHiveMetastore.java:9963) > at > org.apache.hadoop.hive.metastore.api.ThriftHiveMetastore$Processor$alter_partitions.getResult(ThriftHiveMetastore.java:9947) > at org.apache.thrift.ProcessFunction.process(ProcessFunction.java:39) > at > org.apache.hadoop.hive.metastore.TUGIBasedProcessor$1.run(TUGIBasedProcessor.java:110) > at > org.apache.hadoop.hive.metastore.TUGIBasedProcessor$1.run(TUGIBasedProcessor.java:106) > at java.security.AccessController.doPrivileged(Native Method) > at javax.security.auth.Subject.doAs(Subject.java:422) > at >
[jira] [Updated] (HIVE-16823) "ArrayIndexOutOfBoundsException" in spark_vectorized_dynamic_partition_pruning.q
[ https://issues.apache.org/jira/browse/HIVE-16823?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Jianguo Tian updated HIVE-16823: Description: spark_vectorized_dynamic_partition_pruning.q {code} set hive.optimize.ppd=true; set hive.ppd.remove.duplicatefilters=true; set hive.spark.dynamic.partition.pruning=true; set hive.optimize.metadataonly=false; set hive.optimize.index.filter=true; set hive.vectorized.execution.enabled=true; set hive.strict.checks.cartesian.product=false; -- parent is reduce tasks select count(*) from srcpart join (select ds as ds, ds as `date` from srcpart group by ds) s on (srcpart.ds = s.ds) where s.`date` = '2008-04-08'; {code} The exceptions are as follows: {code} 2017-06-05T09:20:31,468 ERROR [Executor task launch worker-0] spark.SparkReduceRecordHandler: Fatal error: org.apache.hadoop.hive.ql.metadata.HiveException: Error while processing vector batch (tag=0) Column vector types: 0:BYTES, 1:BYTES ["2008-04-08", "2008-04-08"] org.apache.hadoop.hive.ql.metadata.HiveException: Error while processing vector batch (tag=0) Column vector types: 0:BYTES, 1:BYTES ["2008-04-08", "2008-04-08"] at org.apache.hadoop.hive.ql.exec.spark.SparkReduceRecordHandler.processVectors(SparkReduceRecordHandler.java:413) ~[hive-exec-3.0.0-SNAPSHOT.jar:3.0.0-SNAPSHOT] at org.apache.hadoop.hive.ql.exec.spark.SparkReduceRecordHandler.processRow(SparkReduceRecordHandler.java:301) ~[hive-exec-3.0.0-SNAPSHOT.jar:3.0.0-SNAPSHOT] at org.apache.hadoop.hive.ql.exec.spark.HiveReduceFunctionResultList.processNextRecord(HiveReduceFunctionResultList.java:54) ~[hive-exec-3.0.0-SNAPSHOT.jar:3.0.0-SNAPSHOT] at org.apache.hadoop.hive.ql.exec.spark.HiveReduceFunctionResultList.processNextRecord(HiveReduceFunctionResultList.java:28) ~[hive-exec-3.0.0-SNAPSHOT.jar:3.0.0-SNAPSHOT] at org.apache.hadoop.hive.ql.exec.spark.HiveBaseFunctionResultList.hasNext(HiveBaseFunctionResultList.java:85) ~[hive-exec-3.0.0-SNAPSHOT.jar:3.0.0-SNAPSHOT] at scala.collection.convert.Wrappers$JIteratorWrapper.hasNext(Wrappers.scala:42) ~[scala-library-2.11.8.jar:?] at scala.collection.Iterator$class.foreach(Iterator.scala:893) ~[scala-library-2.11.8.jar:?] at scala.collection.AbstractIterator.foreach(Iterator.scala:1336) ~[scala-library-2.11.8.jar:?] at org.apache.spark.rdd.AsyncRDDActions$$anonfun$foreachAsync$1$$anonfun$apply$12.apply(AsyncRDDActions.scala:127) ~[spark-core_2.11-2.0.0.jar:2.0.0] at org.apache.spark.rdd.AsyncRDDActions$$anonfun$foreachAsync$1$$anonfun$apply$12.apply(AsyncRDDActions.scala:127) ~[spark-core_2.11-2.0.0.jar:2.0.0] at org.apache.spark.SparkContext$$anonfun$33.apply(SparkContext.scala:1974) ~[spark-core_2.11-2.0.0.jar:2.0.0] at org.apache.spark.SparkContext$$anonfun$33.apply(SparkContext.scala:1974) ~[spark-core_2.11-2.0.0.jar:2.0.0] at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:70) ~[spark-core_2.11-2.0.0.jar:2.0.0] at org.apache.spark.scheduler.Task.run(Task.scala:85) ~[spark-core_2.11-2.0.0.jar:2.0.0] at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:274) ~[spark-core_2.11-2.0.0.jar:2.0.0] at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) [?:1.8.0_112] at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) [?:1.8.0_112] at java.lang.Thread.run(Thread.java:745) [?:1.8.0_112] Caused by: java.lang.ArrayIndexOutOfBoundsException: 1 at org.apache.hadoop.hive.ql.exec.vector.VectorGroupKeyHelper.copyGroupKey(VectorGroupKeyHelper.java:107) ~[hive-exec-3.0.0-SNAPSHOT.jar:3.0.0-SNAPSHOT] at org.apache.hadoop.hive.ql.exec.vector.VectorGroupByOperator$ProcessingModeReduceMergePartial.doProcessBatch(VectorGroupByOperator.java:832) ~[hive-exec-3.0.0-SNAPSHOT.jar:3.0.0-SNAPSHOT] at org.apache.hadoop.hive.ql.exec.vector.VectorGroupByOperator$ProcessingModeBase.processBatch(VectorGroupByOperator.java:179) ~[hive-exec-3.0.0-SNAPSHOT.jar:3.0.0-SNAPSHOT] at org.apache.hadoop.hive.ql.exec.vector.VectorGroupByOperator.process(VectorGroupByOperator.java:1035) ~[hive-exec-3.0.0-SNAPSHOT.jar:3.0.0-SNAPSHOT] at org.apache.hadoop.hive.ql.exec.spark.SparkReduceRecordHandler.processVectors(SparkReduceRecordHandler.java:400) ~[hive-exec-3.0.0-SNAPSHOT.jar:3.0.0-SNAPSHOT] ... 17 more 2017-06-05T09:20:31,472 ERROR [Executor task launch worker-0] executor.Executor: Exception in task 2.0 in stage 1.0 (TID 8) java.lang.RuntimeException: org.apache.hadoop.hive.ql.metadata.HiveException: Error while processing vector batch (tag=0) Column vector types: 0:BYTES, 1:BYTES ["2008-04-08", "2008-04-08"] at org.apache.hadoop.hive.ql.exec.spark.SparkReduceRecordHandler.processRow(SparkReduceRecordHandler.java:315)
[jira] [Comment Edited] (HIVE-16823) "ArrayIndexOutOfBoundsException" in spark_vectorized_dynamic_partition_pruning.q
[ https://issues.apache.org/jira/browse/HIVE-16823?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=16039995#comment-16039995 ] Jianguo Tian edited comment on HIVE-16823 at 6/7/17 1:58 AM: - Hi, [~mmccline]. This exception was indeed triggered by [-HIVE-16273-|https://issues.apache.org/jira/browse/HIVE-16273], if I build Hive with code before this patch, this exception won't occur. Any comments and suggestion will be appreciated. Thx! And in my opinion, it would be better to add some detailed description for [-HIVE-16273-|https://issues.apache.org/jira/browse/HIVE-16273]. was (Author: jonnyr): Hi, [~mmccline]. This exception was indeed triggered by [-HIVE-16273-|https://issues.apache.org/jira/browse/HIVE-16273], if I build Hive with code before this patch, this exception won't occur. Any comments and suggestion will be appreciated. Thx! > "ArrayIndexOutOfBoundsException" in > spark_vectorized_dynamic_partition_pruning.q > > > Key: HIVE-16823 > URL: https://issues.apache.org/jira/browse/HIVE-16823 > Project: Hive > Issue Type: Bug >Reporter: Jianguo Tian > > script.q > {code} > set hive.optimize.ppd=true; > set hive.ppd.remove.duplicatefilters=true; > set hive.spark.dynamic.partition.pruning=true; > set hive.optimize.metadataonly=false; > -- set hive.optimize.index.filter=true; > set hive.vectorized.execution.enabled=true; > set hive.strict.checks.cartesian.product=false; > -- parent is reduce tasks > select count(*) from srcpart join (select ds as ds, ds as `date` from srcpart > group by ds) s on (srcpart.ds = s.ds) where s.`date` = '2008-04-08'; > {code} > The exceptions are as follows: > {code} > 2017-06-05T09:20:31,468 ERROR [Executor task launch worker-0] > spark.SparkReduceRecordHandler: Fatal error: > org.apache.hadoop.hive.ql.metadata.HiveException: Error while processing > vector batch (tag=0) Column vector types: 0:BYTES, 1:BYTES > ["2008-04-08", "2008-04-08"] > org.apache.hadoop.hive.ql.metadata.HiveException: Error while processing > vector batch (tag=0) Column vector types: 0:BYTES, 1:BYTES > ["2008-04-08", "2008-04-08"] > at > org.apache.hadoop.hive.ql.exec.spark.SparkReduceRecordHandler.processVectors(SparkReduceRecordHandler.java:413) > ~[hive-exec-3.0.0-SNAPSHOT.jar:3.0.0-SNAPSHOT] > at > org.apache.hadoop.hive.ql.exec.spark.SparkReduceRecordHandler.processRow(SparkReduceRecordHandler.java:301) > ~[hive-exec-3.0.0-SNAPSHOT.jar:3.0.0-SNAPSHOT] > at > org.apache.hadoop.hive.ql.exec.spark.HiveReduceFunctionResultList.processNextRecord(HiveReduceFunctionResultList.java:54) > ~[hive-exec-3.0.0-SNAPSHOT.jar:3.0.0-SNAPSHOT] > at > org.apache.hadoop.hive.ql.exec.spark.HiveReduceFunctionResultList.processNextRecord(HiveReduceFunctionResultList.java:28) > ~[hive-exec-3.0.0-SNAPSHOT.jar:3.0.0-SNAPSHOT] > at > org.apache.hadoop.hive.ql.exec.spark.HiveBaseFunctionResultList.hasNext(HiveBaseFunctionResultList.java:85) > ~[hive-exec-3.0.0-SNAPSHOT.jar:3.0.0-SNAPSHOT] > at > scala.collection.convert.Wrappers$JIteratorWrapper.hasNext(Wrappers.scala:42) > ~[scala-library-2.11.8.jar:?] > at scala.collection.Iterator$class.foreach(Iterator.scala:893) > ~[scala-library-2.11.8.jar:?] > at scala.collection.AbstractIterator.foreach(Iterator.scala:1336) > ~[scala-library-2.11.8.jar:?] > at > org.apache.spark.rdd.AsyncRDDActions$$anonfun$foreachAsync$1$$anonfun$apply$12.apply(AsyncRDDActions.scala:127) > ~[spark-core_2.11-2.0.0.jar:2.0.0] > at > org.apache.spark.rdd.AsyncRDDActions$$anonfun$foreachAsync$1$$anonfun$apply$12.apply(AsyncRDDActions.scala:127) > ~[spark-core_2.11-2.0.0.jar:2.0.0] > at > org.apache.spark.SparkContext$$anonfun$33.apply(SparkContext.scala:1974) > ~[spark-core_2.11-2.0.0.jar:2.0.0] > at > org.apache.spark.SparkContext$$anonfun$33.apply(SparkContext.scala:1974) > ~[spark-core_2.11-2.0.0.jar:2.0.0] > at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:70) > ~[spark-core_2.11-2.0.0.jar:2.0.0] > at org.apache.spark.scheduler.Task.run(Task.scala:85) > ~[spark-core_2.11-2.0.0.jar:2.0.0] > at > org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:274) > ~[spark-core_2.11-2.0.0.jar:2.0.0] > at > java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) > [?:1.8.0_112] > at > java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) > [?:1.8.0_112] > at java.lang.Thread.run(Thread.java:745) [?:1.8.0_112] > Caused by: java.lang.ArrayIndexOutOfBoundsException: 1 > at > org.apache.hadoop.hive.ql.exec.vector.VectorGroupKeyHelper.copyGroupKey(VectorGroupKeyHelper.java:107) > ~[hive-exec-3.0.0-SNAPSHOT.jar:3.0.0-SNAPSHOT] > at >
[jira] [Updated] (HIVE-16839) Unbalanced calls to openTransaction/commitTransaction when alter the same partition concurrently
[ https://issues.apache.org/jira/browse/HIVE-16839?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Nemon Lou updated HIVE-16839: - Description: SQL to reproduce: prepare: {noformat} hdfs dfs -mkdir -p /hzsrc/external/writing_dc/ltgsm/16e7a9b2-21a1-3f4f-8061-bc3395281627 1,create external table tb_ltgsm_external (id int) PARTITIONED by (cp string,ld string); {noformat} open one beeline run these two sql many times {noformat} 2,ALTER TABLE tb_ltgsm_external ADD IF NOT EXISTS PARTITION (cp=2017060513,ld=2017060610); 3,ALTER TABLE tb_ltgsm_external PARTITION (cp=2017060513,ld=2017060610) SET LOCATION 'hdfs://hacluster/hzsrc/external/writing_dc/ltgsm/16e7a9b2-21a1-3f4f-8061-bc3395281627'; {noformat} open another beeline to run this sql many times at the same time. {noformat} 4,ALTER TABLE tb_ltgsm_external DROP PARTITION (cp=2017060513,ld=2017060610); {noformat} MetaStore logs: {noformat} 2017-06-06 21:58:34,213 | ERROR | pool-6-thread-197 | Retrying HMSHandler after 2000 ms (attempt 1 of 10) with error: javax.jdo.JDOObjectNotFoundException: No such database row FailedObject:49[OID]org.apache.hadoop.hive.metastore.model.MStorageDescriptor at org.datanucleus.api.jdo.NucleusJDOHelper.getJDOExceptionForNucleusException(NucleusJDOHelper.java:475) at org.datanucleus.api.jdo.JDOAdapter.getApiExceptionForNucleusException(JDOAdapter.java:1158) at org.datanucleus.state.JDOStateManager.isLoaded(JDOStateManager.java:3231) at org.apache.hadoop.hive.metastore.model.MStorageDescriptor.jdoGetcd(MStorageDescriptor.java) at org.apache.hadoop.hive.metastore.model.MStorageDescriptor.getCD(MStorageDescriptor.java:184) at org.apache.hadoop.hive.metastore.ObjectStore.convertToStorageDescriptor(ObjectStore.java:1282) at org.apache.hadoop.hive.metastore.ObjectStore.convertToStorageDescriptor(ObjectStore.java:1299) at org.apache.hadoop.hive.metastore.ObjectStore.convertToPart(ObjectStore.java:1680) at org.apache.hadoop.hive.metastore.ObjectStore.getPartition(ObjectStore.java:1586) at sun.reflect.GeneratedMethodAccessor35.invoke(Unknown Source) at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) at java.lang.reflect.Method.invoke(Method.java:497) at org.apache.hadoop.hive.metastore.RawStoreProxy.invoke(RawStoreProxy.java:98) at com.sun.proxy.$Proxy0.getPartition(Unknown Source) at org.apache.hadoop.hive.metastore.HiveAlterHandler.alterPartitions(HiveAlterHandler.java:538) at org.apache.hadoop.hive.metastore.HiveMetaStore$HMSHandler.alter_partitions(HiveMetaStore.java:3317) at sun.reflect.GeneratedMethodAccessor37.invoke(Unknown Source) at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) at java.lang.reflect.Method.invoke(Method.java:497) at org.apache.hadoop.hive.metastore.RetryingHMSHandler.invoke(RetryingHMSHandler.java:102) at com.sun.proxy.$Proxy12.alter_partitions(Unknown Source) at org.apache.hadoop.hive.metastore.api.ThriftHiveMetastore$Processor$alter_partitions.getResult(ThriftHiveMetastore.java:9963) at org.apache.hadoop.hive.metastore.api.ThriftHiveMetastore$Processor$alter_partitions.getResult(ThriftHiveMetastore.java:9947) at org.apache.thrift.ProcessFunction.process(ProcessFunction.java:39) at org.apache.hadoop.hive.metastore.TUGIBasedProcessor$1.run(TUGIBasedProcessor.java:110) at org.apache.hadoop.hive.metastore.TUGIBasedProcessor$1.run(TUGIBasedProcessor.java:106) at java.security.AccessController.doPrivileged(Native Method) at javax.security.auth.Subject.doAs(Subject.java:422) at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1673) at org.apache.hadoop.hive.metastore.TUGIBasedProcessor.process(TUGIBasedProcessor.java:118) at org.apache.thrift.server.TThreadPoolServer$WorkerProcess.run(TThreadPoolServer.java:285) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) at java.lang.Thread.run(Thread.java:745) NestedThrowablesStackTrace: No such database row org.datanucleus.exceptions.NucleusObjectNotFoundException: No such database row at org.datanucleus.store.rdbms.request.FetchRequest.execute(FetchRequest.java:357) at org.datanucleus.store.rdbms.RDBMSPersistenceHandler.fetchObject(RDBMSPersistenceHandler.java:324) at org.datanucleus.state.AbstractStateManager.loadFieldsFromDatastore(AbstractStateManager.java:1120) at org.datanucleus.state.JDOStateManager.loadSpecifiedFields(JDOStateManager.java:2916) at org.datanucleus.state.JDOStateManager.isLoaded(JDOStateManager.java:3219) at
[jira] [Commented] (HIVE-16823) "ArrayIndexOutOfBoundsException" in spark_vectorized_dynamic_partition_pruning.q
[ https://issues.apache.org/jira/browse/HIVE-16823?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=16039995#comment-16039995 ] Jianguo Tian commented on HIVE-16823: - Hi, [~mmccline]. This exception was indeed triggered by [-HIVE-16273-|https://issues.apache.org/jira/browse/HIVE-16273], if I build Hive with code before this patch, this exception won't occur. Any comments and suggestion will be appreciated. Thx! > "ArrayIndexOutOfBoundsException" in > spark_vectorized_dynamic_partition_pruning.q > > > Key: HIVE-16823 > URL: https://issues.apache.org/jira/browse/HIVE-16823 > Project: Hive > Issue Type: Bug >Reporter: Jianguo Tian > > script.q > {code} > set hive.optimize.ppd=true; > set hive.ppd.remove.duplicatefilters=true; > set hive.spark.dynamic.partition.pruning=true; > set hive.optimize.metadataonly=false; > -- set hive.optimize.index.filter=true; > set hive.vectorized.execution.enabled=true; > set hive.strict.checks.cartesian.product=false; > -- parent is reduce tasks > select count(*) from srcpart join (select ds as ds, ds as `date` from srcpart > group by ds) s on (srcpart.ds = s.ds) where s.`date` = '2008-04-08'; > {code} > The exceptions are as follows: > {code} > 2017-06-05T09:20:31,468 ERROR [Executor task launch worker-0] > spark.SparkReduceRecordHandler: Fatal error: > org.apache.hadoop.hive.ql.metadata.HiveException: Error while processing > vector batch (tag=0) Column vector types: 0:BYTES, 1:BYTES > ["2008-04-08", "2008-04-08"] > org.apache.hadoop.hive.ql.metadata.HiveException: Error while processing > vector batch (tag=0) Column vector types: 0:BYTES, 1:BYTES > ["2008-04-08", "2008-04-08"] > at > org.apache.hadoop.hive.ql.exec.spark.SparkReduceRecordHandler.processVectors(SparkReduceRecordHandler.java:413) > ~[hive-exec-3.0.0-SNAPSHOT.jar:3.0.0-SNAPSHOT] > at > org.apache.hadoop.hive.ql.exec.spark.SparkReduceRecordHandler.processRow(SparkReduceRecordHandler.java:301) > ~[hive-exec-3.0.0-SNAPSHOT.jar:3.0.0-SNAPSHOT] > at > org.apache.hadoop.hive.ql.exec.spark.HiveReduceFunctionResultList.processNextRecord(HiveReduceFunctionResultList.java:54) > ~[hive-exec-3.0.0-SNAPSHOT.jar:3.0.0-SNAPSHOT] > at > org.apache.hadoop.hive.ql.exec.spark.HiveReduceFunctionResultList.processNextRecord(HiveReduceFunctionResultList.java:28) > ~[hive-exec-3.0.0-SNAPSHOT.jar:3.0.0-SNAPSHOT] > at > org.apache.hadoop.hive.ql.exec.spark.HiveBaseFunctionResultList.hasNext(HiveBaseFunctionResultList.java:85) > ~[hive-exec-3.0.0-SNAPSHOT.jar:3.0.0-SNAPSHOT] > at > scala.collection.convert.Wrappers$JIteratorWrapper.hasNext(Wrappers.scala:42) > ~[scala-library-2.11.8.jar:?] > at scala.collection.Iterator$class.foreach(Iterator.scala:893) > ~[scala-library-2.11.8.jar:?] > at scala.collection.AbstractIterator.foreach(Iterator.scala:1336) > ~[scala-library-2.11.8.jar:?] > at > org.apache.spark.rdd.AsyncRDDActions$$anonfun$foreachAsync$1$$anonfun$apply$12.apply(AsyncRDDActions.scala:127) > ~[spark-core_2.11-2.0.0.jar:2.0.0] > at > org.apache.spark.rdd.AsyncRDDActions$$anonfun$foreachAsync$1$$anonfun$apply$12.apply(AsyncRDDActions.scala:127) > ~[spark-core_2.11-2.0.0.jar:2.0.0] > at > org.apache.spark.SparkContext$$anonfun$33.apply(SparkContext.scala:1974) > ~[spark-core_2.11-2.0.0.jar:2.0.0] > at > org.apache.spark.SparkContext$$anonfun$33.apply(SparkContext.scala:1974) > ~[spark-core_2.11-2.0.0.jar:2.0.0] > at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:70) > ~[spark-core_2.11-2.0.0.jar:2.0.0] > at org.apache.spark.scheduler.Task.run(Task.scala:85) > ~[spark-core_2.11-2.0.0.jar:2.0.0] > at > org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:274) > ~[spark-core_2.11-2.0.0.jar:2.0.0] > at > java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) > [?:1.8.0_112] > at > java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) > [?:1.8.0_112] > at java.lang.Thread.run(Thread.java:745) [?:1.8.0_112] > Caused by: java.lang.ArrayIndexOutOfBoundsException: 1 > at > org.apache.hadoop.hive.ql.exec.vector.VectorGroupKeyHelper.copyGroupKey(VectorGroupKeyHelper.java:107) > ~[hive-exec-3.0.0-SNAPSHOT.jar:3.0.0-SNAPSHOT] > at > org.apache.hadoop.hive.ql.exec.vector.VectorGroupByOperator$ProcessingModeReduceMergePartial.doProcessBatch(VectorGroupByOperator.java:832) > ~[hive-exec-3.0.0-SNAPSHOT.jar:3.0.0-SNAPSHOT] > at > org.apache.hadoop.hive.ql.exec.vector.VectorGroupByOperator$ProcessingModeBase.processBatch(VectorGroupByOperator.java:179) > ~[hive-exec-3.0.0-SNAPSHOT.jar:3.0.0-SNAPSHOT] > at >
[jira] [Updated] (HIVE-12492) MapJoin: 4 million unique integers seems to be a probe plateau
[ https://issues.apache.org/jira/browse/HIVE-12492?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Gopal V updated HIVE-12492: --- Description: After 4 million keys, the map-join implementation seems to suffer from a performance degradation. The hashtable build & probe time makes this very inefficient, even if the data is very compact (i.e 2 ints). Falling back onto the shuffle join or bucket map-join is useful after 2^22 items. (Note: this fixes a statsutil issue - due to the extra clone() in the column stats path) was: After 4 million keys, the map-join implementation seems to suffer from a performance degradation. The hashtable build & probe time makes this very inefficient, even if the data is very compact (i.e 2 ints). Falling back onto the shuffle join or bucket map-join is useful after 2^22 items. > MapJoin: 4 million unique integers seems to be a probe plateau > -- > > Key: HIVE-12492 > URL: https://issues.apache.org/jira/browse/HIVE-12492 > Project: Hive > Issue Type: Improvement > Components: Query Planning >Affects Versions: 1.3.0, 1.2.1, 2.0.0 >Reporter: Gopal V >Assignee: Jesus Camacho Rodriguez > Labels: TODOC2.2 > Fix For: 2.2.0 > > Attachments: HIVE-12492.01.patch, HIVE-12492.02.patch, > HIVE-12492.patch > > > After 4 million keys, the map-join implementation seems to suffer from a > performance degradation. > The hashtable build & probe time makes this very inefficient, even if the > data is very compact (i.e 2 ints). > Falling back onto the shuffle join or bucket map-join is useful after 2^22 > items. > (Note: this fixes a statsutil issue - due to the extra clone() in the column > stats path) -- This message was sent by Atlassian JIRA (v6.3.15#6346)
[jira] [Commented] (HIVE-16834) Review org.apache.hadoop.hive.serde2.ByteStream
[ https://issues.apache.org/jira/browse/HIVE-16834?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=16039962#comment-16039962 ] Hive QA commented on HIVE-16834: Here are the results of testing the latest attachment: https://issues.apache.org/jira/secure/attachment/12871695/HIVE-16834.1.patch {color:red}ERROR:{color} -1 due to no test(s) being added or modified. {color:red}ERROR:{color} -1 due to 4 failed/errored test(s), 10820 tests executed *Failed tests:* {noformat} org.apache.hadoop.hive.cli.TestBeeLineDriver.testCliDriver[insert_overwrite_local_directory_1] (batchId=237) org.apache.hadoop.hive.cli.TestMiniLlapCliDriver.testCliDriver[orc_ppd_basic] (batchId=140) org.apache.hadoop.hive.cli.TestPerfCliDriver.testCliDriver[query14] (batchId=232) org.apache.hadoop.hive.cli.TestPerfCliDriver.testCliDriver[query78] (batchId=232) {noformat} Test results: https://builds.apache.org/job/PreCommit-HIVE-Build/5556/testReport Console output: https://builds.apache.org/job/PreCommit-HIVE-Build/5556/console Test logs: http://104.198.109.242/logs/PreCommit-HIVE-Build-5556/ Messages: {noformat} Executing org.apache.hive.ptest.execution.TestCheckPhase Executing org.apache.hive.ptest.execution.PrepPhase Executing org.apache.hive.ptest.execution.ExecutionPhase Executing org.apache.hive.ptest.execution.ReportingPhase Tests exited with: TestsFailedException: 4 tests failed {noformat} This message is automatically generated. ATTACHMENT ID: 12871695 - PreCommit-HIVE-Build > Review org.apache.hadoop.hive.serde2.ByteStream > --- > > Key: HIVE-16834 > URL: https://issues.apache.org/jira/browse/HIVE-16834 > Project: Hive > Issue Type: Improvement > Components: Serializers/Deserializers >Affects Versions: 2.1.1, 3.0.0 >Reporter: BELUGA BEHR >Priority: Trivial > Attachments: HIVE-16834.1.patch > > > Some code clean up and enhancements of > {{org.apache.hadoop.hive.serde2.ByteStream}}. -- This message was sent by Atlassian JIRA (v6.3.15#6346)
[jira] [Updated] (HIVE-16838) Improve plans for subqueries with non-equi co-related predicates
[ https://issues.apache.org/jira/browse/HIVE-16838?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Vineet Garg updated HIVE-16838: --- Attachment: HIVE-16838.1.patch cc [~ashutoshc] This is a preliminary patch which eliminates un-necessary join with outer query for non-equi correlated predicates (same as equi correlated predicates). It still has issues which I am working on. Uploading it to run tests to see failure. > Improve plans for subqueries with non-equi co-related predicates > > > Key: HIVE-16838 > URL: https://issues.apache.org/jira/browse/HIVE-16838 > Project: Hive > Issue Type: Sub-task > Components: Logical Optimizer >Reporter: Vineet Garg >Assignee: Vineet Garg > Labels: sub-query > Attachments: HIVE-16838.1.patch > > -- This message was sent by Atlassian JIRA (v6.3.15#6346)
[jira] [Updated] (HIVE-16838) Improve plans for subqueries with non-equi co-related predicates
[ https://issues.apache.org/jira/browse/HIVE-16838?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Vineet Garg updated HIVE-16838: --- Labels: sub-query (was: ) > Improve plans for subqueries with non-equi co-related predicates > > > Key: HIVE-16838 > URL: https://issues.apache.org/jira/browse/HIVE-16838 > Project: Hive > Issue Type: Sub-task > Components: Logical Optimizer >Reporter: Vineet Garg >Assignee: Vineet Garg > Labels: sub-query > -- This message was sent by Atlassian JIRA (v6.3.15#6346)
[jira] [Assigned] (HIVE-16838) Improve plans for subqueries with non-equi co-related predicates
[ https://issues.apache.org/jira/browse/HIVE-16838?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Vineet Garg reassigned HIVE-16838: -- > Improve plans for subqueries with non-equi co-related predicates > > > Key: HIVE-16838 > URL: https://issues.apache.org/jira/browse/HIVE-16838 > Project: Hive > Issue Type: Sub-task >Reporter: Vineet Garg >Assignee: Vineet Garg > -- This message was sent by Atlassian JIRA (v6.3.15#6346)
[jira] [Updated] (HIVE-16837) MetadataOnly optimizer conflicts with count distinct rewrite
[ https://issues.apache.org/jira/browse/HIVE-16837?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Pengcheng Xiong updated HIVE-16837: --- Summary: MetadataOnly optimizer conflicts with count distinct rewrite (was: improve query28 for count distinct rewrite) > MetadataOnly optimizer conflicts with count distinct rewrite > > > Key: HIVE-16837 > URL: https://issues.apache.org/jira/browse/HIVE-16837 > Project: Hive > Issue Type: Sub-task >Reporter: Pengcheng Xiong >Assignee: Pengcheng Xiong > -- This message was sent by Atlassian JIRA (v6.3.15#6346)
[jira] [Reopened] (HIVE-16837) improve query28 for count distinct rewrite
[ https://issues.apache.org/jira/browse/HIVE-16837?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Pengcheng Xiong reopened HIVE-16837: > improve query28 for count distinct rewrite > -- > > Key: HIVE-16837 > URL: https://issues.apache.org/jira/browse/HIVE-16837 > Project: Hive > Issue Type: Sub-task >Reporter: Pengcheng Xiong >Assignee: Pengcheng Xiong > -- This message was sent by Atlassian JIRA (v6.3.15#6346)
[jira] [Resolved] (HIVE-16837) improve query28 for count distinct rewrite
[ https://issues.apache.org/jira/browse/HIVE-16837?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Pengcheng Xiong resolved HIVE-16837. Resolution: Fixed > improve query28 for count distinct rewrite > -- > > Key: HIVE-16837 > URL: https://issues.apache.org/jira/browse/HIVE-16837 > Project: Hive > Issue Type: Sub-task >Reporter: Pengcheng Xiong >Assignee: Pengcheng Xiong > -- This message was sent by Atlassian JIRA (v6.3.15#6346)
[jira] [Commented] (HIVE-16667) PostgreSQL metastore handling of CLOB types for COLUMNS_V2.TYPE_NAME and other field is incorrect
[ https://issues.apache.org/jira/browse/HIVE-16667?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=16039922#comment-16039922 ] Hive QA commented on HIVE-16667: Here are the results of testing the latest attachment: https://issues.apache.org/jira/secure/attachment/12871687/HIVE-16667.2.patch {color:red}ERROR:{color} -1 due to no test(s) being added or modified. {color:red}ERROR:{color} -1 due to 264 failed/errored test(s), 6622 tests executed *Failed tests:* {noformat} org.apache.hadoop.hive.cli.TestAccumuloCliDriver.testCliDriver[accumulo_index] (batchId=228) org.apache.hadoop.hive.cli.TestBeeLineDriver.testCliDriver[create_merge_compressed] (batchId=237) org.apache.hadoop.hive.cli.TestBeeLineDriver.testCliDriver[insert_overwrite_local_directory_1] (batchId=237) org.apache.hadoop.hive.cli.TestCliDriver.org.apache.hadoop.hive.cli.TestCliDriver (batchId=1) org.apache.hadoop.hive.cli.TestCliDriver.org.apache.hadoop.hive.cli.TestCliDriver (batchId=10) org.apache.hadoop.hive.cli.TestCliDriver.org.apache.hadoop.hive.cli.TestCliDriver (batchId=11) org.apache.hadoop.hive.cli.TestCliDriver.org.apache.hadoop.hive.cli.TestCliDriver (batchId=12) org.apache.hadoop.hive.cli.TestCliDriver.org.apache.hadoop.hive.cli.TestCliDriver (batchId=13) org.apache.hadoop.hive.cli.TestCliDriver.org.apache.hadoop.hive.cli.TestCliDriver (batchId=14) org.apache.hadoop.hive.cli.TestCliDriver.org.apache.hadoop.hive.cli.TestCliDriver (batchId=15) org.apache.hadoop.hive.cli.TestCliDriver.org.apache.hadoop.hive.cli.TestCliDriver (batchId=16) org.apache.hadoop.hive.cli.TestCliDriver.org.apache.hadoop.hive.cli.TestCliDriver (batchId=17) org.apache.hadoop.hive.cli.TestCliDriver.org.apache.hadoop.hive.cli.TestCliDriver (batchId=18) org.apache.hadoop.hive.cli.TestCliDriver.org.apache.hadoop.hive.cli.TestCliDriver (batchId=19) org.apache.hadoop.hive.cli.TestCliDriver.org.apache.hadoop.hive.cli.TestCliDriver (batchId=2) org.apache.hadoop.hive.cli.TestCliDriver.org.apache.hadoop.hive.cli.TestCliDriver (batchId=20) org.apache.hadoop.hive.cli.TestCliDriver.org.apache.hadoop.hive.cli.TestCliDriver (batchId=21) org.apache.hadoop.hive.cli.TestCliDriver.org.apache.hadoop.hive.cli.TestCliDriver (batchId=22) org.apache.hadoop.hive.cli.TestCliDriver.org.apache.hadoop.hive.cli.TestCliDriver (batchId=23) org.apache.hadoop.hive.cli.TestCliDriver.org.apache.hadoop.hive.cli.TestCliDriver (batchId=24) org.apache.hadoop.hive.cli.TestCliDriver.org.apache.hadoop.hive.cli.TestCliDriver (batchId=25) org.apache.hadoop.hive.cli.TestCliDriver.org.apache.hadoop.hive.cli.TestCliDriver (batchId=26) org.apache.hadoop.hive.cli.TestCliDriver.org.apache.hadoop.hive.cli.TestCliDriver (batchId=27) org.apache.hadoop.hive.cli.TestCliDriver.org.apache.hadoop.hive.cli.TestCliDriver (batchId=28) org.apache.hadoop.hive.cli.TestCliDriver.org.apache.hadoop.hive.cli.TestCliDriver (batchId=29) org.apache.hadoop.hive.cli.TestCliDriver.org.apache.hadoop.hive.cli.TestCliDriver (batchId=3) org.apache.hadoop.hive.cli.TestCliDriver.org.apache.hadoop.hive.cli.TestCliDriver (batchId=30) org.apache.hadoop.hive.cli.TestCliDriver.org.apache.hadoop.hive.cli.TestCliDriver (batchId=31) org.apache.hadoop.hive.cli.TestCliDriver.org.apache.hadoop.hive.cli.TestCliDriver (batchId=32) org.apache.hadoop.hive.cli.TestCliDriver.org.apache.hadoop.hive.cli.TestCliDriver (batchId=33) org.apache.hadoop.hive.cli.TestCliDriver.org.apache.hadoop.hive.cli.TestCliDriver (batchId=34) org.apache.hadoop.hive.cli.TestCliDriver.org.apache.hadoop.hive.cli.TestCliDriver (batchId=35) org.apache.hadoop.hive.cli.TestCliDriver.org.apache.hadoop.hive.cli.TestCliDriver (batchId=36) org.apache.hadoop.hive.cli.TestCliDriver.org.apache.hadoop.hive.cli.TestCliDriver (batchId=37) org.apache.hadoop.hive.cli.TestCliDriver.org.apache.hadoop.hive.cli.TestCliDriver (batchId=38) org.apache.hadoop.hive.cli.TestCliDriver.org.apache.hadoop.hive.cli.TestCliDriver (batchId=39) org.apache.hadoop.hive.cli.TestCliDriver.org.apache.hadoop.hive.cli.TestCliDriver (batchId=4) org.apache.hadoop.hive.cli.TestCliDriver.org.apache.hadoop.hive.cli.TestCliDriver (batchId=40) org.apache.hadoop.hive.cli.TestCliDriver.org.apache.hadoop.hive.cli.TestCliDriver (batchId=41) org.apache.hadoop.hive.cli.TestCliDriver.org.apache.hadoop.hive.cli.TestCliDriver (batchId=42) org.apache.hadoop.hive.cli.TestCliDriver.org.apache.hadoop.hive.cli.TestCliDriver (batchId=43) org.apache.hadoop.hive.cli.TestCliDriver.org.apache.hadoop.hive.cli.TestCliDriver (batchId=44) org.apache.hadoop.hive.cli.TestCliDriver.org.apache.hadoop.hive.cli.TestCliDriver (batchId=45) org.apache.hadoop.hive.cli.TestCliDriver.org.apache.hadoop.hive.cli.TestCliDriver (batchId=46) org.apache.hadoop.hive.cli.TestCliDriver.org.apache.hadoop.hive.cli.TestCliDriver (batchId=47) org.apache.hadoop.hive.cli.TestCliDriver.org.apache.hadoop.hive.cli.TestCliDriver (batchId=48)
[jira] [Commented] (HIVE-16836) improve query28 for count distinct rewrite
[ https://issues.apache.org/jira/browse/HIVE-16836?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=16039902#comment-16039902 ] Pengcheng Xiong commented on HIVE-16836: [~ashutoshc], could u take a look? thanks. > improve query28 for count distinct rewrite > -- > > Key: HIVE-16836 > URL: https://issues.apache.org/jira/browse/HIVE-16836 > Project: Hive > Issue Type: Sub-task >Reporter: Pengcheng Xiong >Assignee: Pengcheng Xiong > Attachments: HIVE-16836.01.patch > > -- This message was sent by Atlassian JIRA (v6.3.15#6346)
[jira] [Updated] (HIVE-16836) improve query28 for count distinct rewrite
[ https://issues.apache.org/jira/browse/HIVE-16836?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Pengcheng Xiong updated HIVE-16836: --- Status: Patch Available (was: Open) > improve query28 for count distinct rewrite > -- > > Key: HIVE-16836 > URL: https://issues.apache.org/jira/browse/HIVE-16836 > Project: Hive > Issue Type: Sub-task >Reporter: Pengcheng Xiong >Assignee: Pengcheng Xiong > Attachments: HIVE-16836.01.patch > > -- This message was sent by Atlassian JIRA (v6.3.15#6346)
[jira] [Updated] (HIVE-16836) improve query28 for count distinct rewrite
[ https://issues.apache.org/jira/browse/HIVE-16836?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Pengcheng Xiong updated HIVE-16836: --- Attachment: HIVE-16836.01.patch > improve query28 for count distinct rewrite > -- > > Key: HIVE-16836 > URL: https://issues.apache.org/jira/browse/HIVE-16836 > Project: Hive > Issue Type: Sub-task >Reporter: Pengcheng Xiong >Assignee: Pengcheng Xiong > Attachments: HIVE-16836.01.patch > > -- This message was sent by Atlassian JIRA (v6.3.15#6346)
[jira] [Assigned] (HIVE-16836) improve query28 for count distinct rewrite
[ https://issues.apache.org/jira/browse/HIVE-16836?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Pengcheng Xiong reassigned HIVE-16836: -- > improve query28 for count distinct rewrite > -- > > Key: HIVE-16836 > URL: https://issues.apache.org/jira/browse/HIVE-16836 > Project: Hive > Issue Type: Sub-task >Reporter: Pengcheng Xiong >Assignee: Pengcheng Xiong > -- This message was sent by Atlassian JIRA (v6.3.15#6346)
[jira] [Assigned] (HIVE-16837) improve query28 for count distinct rewrite
[ https://issues.apache.org/jira/browse/HIVE-16837?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Pengcheng Xiong reassigned HIVE-16837: -- > improve query28 for count distinct rewrite > -- > > Key: HIVE-16837 > URL: https://issues.apache.org/jira/browse/HIVE-16837 > Project: Hive > Issue Type: Sub-task >Reporter: Pengcheng Xiong >Assignee: Pengcheng Xiong > -- This message was sent by Atlassian JIRA (v6.3.15#6346)
[jira] [Commented] (HIVE-12631) LLAP: support ORC ACID tables
[ https://issues.apache.org/jira/browse/HIVE-12631?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=16039900#comment-16039900 ] Teddy Choi commented on HIVE-12631: --- The 10th patch fixed data inconsistency bug and made OrcAcidEncodedDataConsumer to utilize VectorizedOrcAcidRowBatchReader. [~ekoifman], the changes in VectorizedOrcAcidRowBatchReader are minor. - Allowed ColumnVector arrays instead of VectorizedRowBatch. - Replaced BitSet objects with int arrays to reuse objects. - Removed exception raising code in memory size check. LlapIoImpl uses ColumnVectorProducer for async I/O and uses LowLevelCache for caching. I made OrcColumnVectorProducer produce ColumnVectorBatch from ORC ACID files, OrcEncodedDataReader to cache original files, and VectorizedOrcAcidRowBatchReader to cache delta files. Thank you. > LLAP: support ORC ACID tables > - > > Key: HIVE-12631 > URL: https://issues.apache.org/jira/browse/HIVE-12631 > Project: Hive > Issue Type: Bug > Components: llap, Transactions >Reporter: Sergey Shelukhin >Assignee: Teddy Choi > Attachments: HIVE-12631.10.patch, HIVE-12631.10.patch, > HIVE-12631.1.patch, HIVE-12631.2.patch, HIVE-12631.3.patch, > HIVE-12631.4.patch, HIVE-12631.5.patch, HIVE-12631.6.patch, > HIVE-12631.7.patch, HIVE-12631.8.patch, HIVE-12631.8.patch, HIVE-12631.9.patch > > > LLAP uses a completely separate read path in ORC to allow for caching and > parallelization of reads and processing. This path does not support ACID. As > far as I remember ACID logic is embedded inside ORC format; we need to > refactor it to be on top of some interface, if practical; or just port it to > LLAP read path. > Another consideration is how the logic will work with cache. The cache is > currently low-level (CB-level in ORC), so we could just use it to read bases > and deltas (deltas should be cached with higher priority) and merge as usual. > We could also cache merged representation in future. -- This message was sent by Atlassian JIRA (v6.3.15#6346)
[jira] [Commented] (HIVE-16835) Addendum to HIVE-16745
[ https://issues.apache.org/jira/browse/HIVE-16835?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=16039877#comment-16039877 ] Vihang Karajgaonkar commented on HIVE-16835: [~ngangam] Can you please review? > Addendum to HIVE-16745 > -- > > Key: HIVE-16835 > URL: https://issues.apache.org/jira/browse/HIVE-16835 > Project: Hive > Issue Type: Bug >Reporter: Vihang Karajgaonkar >Assignee: Vihang Karajgaonkar > Attachments: HIVE-16835.01.patch > > > HIVE-16745 missed fixing the syntax error in hive-schema-1.1.0.mysql.sql -- This message was sent by Atlassian JIRA (v6.3.15#6346)
[jira] [Updated] (HIVE-16835) Addendum to HIVE-16745
[ https://issues.apache.org/jira/browse/HIVE-16835?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Vihang Karajgaonkar updated HIVE-16835: --- Attachment: HIVE-16835.01.patch > Addendum to HIVE-16745 > -- > > Key: HIVE-16835 > URL: https://issues.apache.org/jira/browse/HIVE-16835 > Project: Hive > Issue Type: Bug >Reporter: Vihang Karajgaonkar >Assignee: Vihang Karajgaonkar > Attachments: HIVE-16835.01.patch > > > HIVE-16745 missed fixing the syntax error in hive-schema-1.1.0.mysql.sql -- This message was sent by Atlassian JIRA (v6.3.15#6346)
[jira] [Updated] (HIVE-16835) Addendum to HIVE-16745
[ https://issues.apache.org/jira/browse/HIVE-16835?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Vihang Karajgaonkar updated HIVE-16835: --- Summary: Addendum to HIVE-16745 (was: Addedum to HIVE-16745) > Addendum to HIVE-16745 > -- > > Key: HIVE-16835 > URL: https://issues.apache.org/jira/browse/HIVE-16835 > Project: Hive > Issue Type: Bug >Reporter: Vihang Karajgaonkar >Assignee: Vihang Karajgaonkar > > HIVE-16745 missed fixing the syntax error in hive-schema-1.1.0.mysql.sql -- This message was sent by Atlassian JIRA (v6.3.15#6346)
[jira] [Commented] (HIVE-16833) Review org.apache.hive.jdbc.HiveMetaDataResultSet
[ https://issues.apache.org/jira/browse/HIVE-16833?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=16039871#comment-16039871 ] Hive QA commented on HIVE-16833: Here are the results of testing the latest attachment: https://issues.apache.org/jira/secure/attachment/12871670/HIVE-16833.1.patch {color:red}ERROR:{color} -1 due to no test(s) being added or modified. {color:red}ERROR:{color} -1 due to 4 failed/errored test(s), 10820 tests executed *Failed tests:* {noformat} org.apache.hadoop.hive.cli.TestMiniLlapCliDriver.testCliDriver[orc_ppd_basic] (batchId=140) org.apache.hadoop.hive.cli.TestMiniLlapLocalCliDriver.testCliDriver[vector_if_expr] (batchId=145) org.apache.hadoop.hive.cli.TestPerfCliDriver.testCliDriver[query14] (batchId=232) org.apache.hadoop.hive.cli.TestPerfCliDriver.testCliDriver[query78] (batchId=232) {noformat} Test results: https://builds.apache.org/job/PreCommit-HIVE-Build/5553/testReport Console output: https://builds.apache.org/job/PreCommit-HIVE-Build/5553/console Test logs: http://104.198.109.242/logs/PreCommit-HIVE-Build-5553/ Messages: {noformat} Executing org.apache.hive.ptest.execution.TestCheckPhase Executing org.apache.hive.ptest.execution.PrepPhase Executing org.apache.hive.ptest.execution.ExecutionPhase Executing org.apache.hive.ptest.execution.ReportingPhase Tests exited with: TestsFailedException: 4 tests failed {noformat} This message is automatically generated. ATTACHMENT ID: 12871670 - PreCommit-HIVE-Build > Review org.apache.hive.jdbc.HiveMetaDataResultSet > - > > Key: HIVE-16833 > URL: https://issues.apache.org/jira/browse/HIVE-16833 > Project: Hive > Issue Type: Improvement >Affects Versions: 2.1.1, 3.0.0 >Reporter: BELUGA BEHR >Priority: Trivial > Attachments: HIVE-16833.1.patch > > > Reviewed and improved {{org.apache.hive.jdbc.HiveMetaDataResultSet}}. > Removed a compiler warning. -- This message was sent by Atlassian JIRA (v6.3.15#6346)
[jira] [Commented] (HIVE-16729) Improve location validator to check for blank paths.
[ https://issues.apache.org/jira/browse/HIVE-16729?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=16039860#comment-16039860 ] Naveen Gangam commented on HIVE-16729: -- [~aihuaxu] Could you please review the change? Thank you > Improve location validator to check for blank paths. > > > Key: HIVE-16729 > URL: https://issues.apache.org/jira/browse/HIVE-16729 > Project: Hive > Issue Type: Sub-task > Components: Hive >Affects Versions: 3.0.0 >Reporter: Naveen Gangam >Assignee: Naveen Gangam >Priority: Minor > Attachments: HIVE-16729.2.patch, HIVE-16729.patch > > > Currently, the schema tool location validator succeeds even when the location > for hive table/partitions have paths like > hdfs://myhost.com:8020/ > hdfs://myhost.com:8020 > where there is actually no "real" path. Having the validator report such path > would be beneficial in preventing runtime errors. -- This message was sent by Atlassian JIRA (v6.3.15#6346)
[jira] [Updated] (HIVE-16729) Improve location validator to check for blank paths.
[ https://issues.apache.org/jira/browse/HIVE-16729?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Naveen Gangam updated HIVE-16729: - Status: Patch Available (was: Open) > Improve location validator to check for blank paths. > > > Key: HIVE-16729 > URL: https://issues.apache.org/jira/browse/HIVE-16729 > Project: Hive > Issue Type: Sub-task > Components: Hive >Affects Versions: 3.0.0 >Reporter: Naveen Gangam >Assignee: Naveen Gangam >Priority: Minor > Attachments: HIVE-16729.2.patch, HIVE-16729.patch > > > Currently, the schema tool location validator succeeds even when the location > for hive table/partitions have paths like > hdfs://myhost.com:8020/ > hdfs://myhost.com:8020 > where there is actually no "real" path. Having the validator report such path > would be beneficial in preventing runtime errors. -- This message was sent by Atlassian JIRA (v6.3.15#6346)
[jira] [Updated] (HIVE-16729) Improve location validator to check for blank paths.
[ https://issues.apache.org/jira/browse/HIVE-16729?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Naveen Gangam updated HIVE-16729: - Attachment: HIVE-16729.2.patch > Improve location validator to check for blank paths. > > > Key: HIVE-16729 > URL: https://issues.apache.org/jira/browse/HIVE-16729 > Project: Hive > Issue Type: Sub-task > Components: Hive >Affects Versions: 3.0.0 >Reporter: Naveen Gangam >Assignee: Naveen Gangam >Priority: Minor > Attachments: HIVE-16729.2.patch, HIVE-16729.patch > > > Currently, the schema tool location validator succeeds even when the location > for hive table/partitions have paths like > hdfs://myhost.com:8020/ > hdfs://myhost.com:8020 > where there is actually no "real" path. Having the validator report such path > would be beneficial in preventing runtime errors. -- This message was sent by Atlassian JIRA (v6.3.15#6346)
[jira] [Updated] (HIVE-16729) Improve location validator to check for blank paths.
[ https://issues.apache.org/jira/browse/HIVE-16729?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Naveen Gangam updated HIVE-16729: - Status: Open (was: Patch Available) The patch is fine. I intend to include another issue I found during testing where there is are multiple spaces in a {{CREATE TABLE}} statement in postgres SQL files for a couple of tables. This causes the schema file parses to not include these tables in the table count. > Improve location validator to check for blank paths. > > > Key: HIVE-16729 > URL: https://issues.apache.org/jira/browse/HIVE-16729 > Project: Hive > Issue Type: Sub-task > Components: Hive >Affects Versions: 3.0.0 >Reporter: Naveen Gangam >Assignee: Naveen Gangam >Priority: Minor > Attachments: HIVE-16729.patch > > > Currently, the schema tool location validator succeeds even when the location > for hive table/partitions have paths like > hdfs://myhost.com:8020/ > hdfs://myhost.com:8020 > where there is actually no "real" path. Having the validator report such path > would be beneficial in preventing runtime errors. -- This message was sent by Atlassian JIRA (v6.3.15#6346)
[jira] [Updated] (HIVE-16832) duplicate ROW__ID possible in multi insert into transactional table
[ https://issues.apache.org/jira/browse/HIVE-16832?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Eugene Koifman updated HIVE-16832: -- Attachment: HIVE-16832.01.patch HIVE-16832.01.patch is an incomplete WIP VectorizedOrcAcidRowBatchReader assumes that ROW__ID.bucketId is the same in each split (and each bucket file of a delete_delta) which is no longer the case SortedDynPartitionOptimizer needs to ensure that data is sorted by by (ROW__ID.bucketId%numBuckets) before it's sorted by ROW__ID so that FileSinkOperator.process() sees all rows for a given bucket equivalence set before moving on to the next equivalence set. > duplicate ROW__ID possible in multi insert into transactional table > --- > > Key: HIVE-16832 > URL: https://issues.apache.org/jira/browse/HIVE-16832 > Project: Hive > Issue Type: Bug > Components: Transactions >Affects Versions: 2.2.0 >Reporter: Eugene Koifman >Assignee: Eugene Koifman >Priority: Critical > Attachments: HIVE-16832.01.patch > > -- This message was sent by Atlassian JIRA (v6.3.15#6346)
[jira] [Updated] (HIVE-16832) duplicate ROW__ID possible in multi insert into transactional table
[ https://issues.apache.org/jira/browse/HIVE-16832?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Eugene Koifman updated HIVE-16832: -- Status: Patch Available (was: Open) > duplicate ROW__ID possible in multi insert into transactional table > --- > > Key: HIVE-16832 > URL: https://issues.apache.org/jira/browse/HIVE-16832 > Project: Hive > Issue Type: Bug > Components: Transactions >Affects Versions: 2.2.0 >Reporter: Eugene Koifman >Assignee: Eugene Koifman >Priority: Critical > Attachments: HIVE-16832.01.patch > > -- This message was sent by Atlassian JIRA (v6.3.15#6346)
[jira] [Updated] (HIVE-12631) LLAP: support ORC ACID tables
[ https://issues.apache.org/jira/browse/HIVE-12631?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Teddy Choi updated HIVE-12631: -- Attachment: HIVE-12631.10.patch > LLAP: support ORC ACID tables > - > > Key: HIVE-12631 > URL: https://issues.apache.org/jira/browse/HIVE-12631 > Project: Hive > Issue Type: Bug > Components: llap, Transactions >Reporter: Sergey Shelukhin >Assignee: Teddy Choi > Attachments: HIVE-12631.10.patch, HIVE-12631.10.patch, > HIVE-12631.1.patch, HIVE-12631.2.patch, HIVE-12631.3.patch, > HIVE-12631.4.patch, HIVE-12631.5.patch, HIVE-12631.6.patch, > HIVE-12631.7.patch, HIVE-12631.8.patch, HIVE-12631.8.patch, HIVE-12631.9.patch > > > LLAP uses a completely separate read path in ORC to allow for caching and > parallelization of reads and processing. This path does not support ACID. As > far as I remember ACID logic is embedded inside ORC format; we need to > refactor it to be on top of some interface, if practical; or just port it to > LLAP read path. > Another consideration is how the logic will work with cache. The cache is > currently low-level (CB-level in ORC), so we could just use it to read bases > and deltas (deltas should be cached with higher priority) and merge as usual. > We could also cache merged representation in future. -- This message was sent by Atlassian JIRA (v6.3.15#6346)
[jira] [Commented] (HIVE-16831) Add unit tests for NPE fixes in HIVE-12054
[ https://issues.apache.org/jira/browse/HIVE-16831?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=16039839#comment-16039839 ] Sunitha Beeram commented on HIVE-16831: --- [~erwaman] Done. > Add unit tests for NPE fixes in HIVE-12054 > -- > > Key: HIVE-16831 > URL: https://issues.apache.org/jira/browse/HIVE-16831 > Project: Hive > Issue Type: Bug > Components: Hive >Reporter: Sunitha Beeram >Assignee: Sunitha Beeram > Attachments: HIVE-16831.1.patch, HIVE-16831.2.patch > > > HIVE-12054 fixed NPE issues related to ObjectInspector which get triggered > when an empty ORC table/partition is read. > This work adds tests that trigger that path. -- This message was sent by Atlassian JIRA (v6.3.15#6346)
[jira] [Updated] (HIVE-16831) Add unit tests for NPE fixes in HIVE-12054
[ https://issues.apache.org/jira/browse/HIVE-16831?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Sunitha Beeram updated HIVE-16831: -- Attachment: HIVE-16831.2.patch > Add unit tests for NPE fixes in HIVE-12054 > -- > > Key: HIVE-16831 > URL: https://issues.apache.org/jira/browse/HIVE-16831 > Project: Hive > Issue Type: Bug > Components: Hive >Reporter: Sunitha Beeram >Assignee: Sunitha Beeram > Attachments: HIVE-16831.1.patch, HIVE-16831.2.patch > > > HIVE-12054 fixed NPE issues related to ObjectInspector which get triggered > when an empty ORC table/partition is read. > This work adds tests that trigger that path. -- This message was sent by Atlassian JIRA (v6.3.15#6346)
[jira] [Assigned] (HIVE-15144) JSON.org license is now CatX
[ https://issues.apache.org/jira/browse/HIVE-15144?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Owen O'Malley reassigned HIVE-15144: Assignee: Owen O'Malley > JSON.org license is now CatX > > > Key: HIVE-15144 > URL: https://issues.apache.org/jira/browse/HIVE-15144 > Project: Hive > Issue Type: Bug >Reporter: Robert Kanter >Assignee: Owen O'Malley >Priority: Blocker > Fix For: 2.2.0 > > Attachments: HIVE-15144.patch, HIVE-15144.patch, HIVE-15144.patch, > HIVE-15144.patch > > > per [update resolved legal|http://www.apache.org/legal/resolved.html#json]: > {quote} > CAN APACHE PRODUCTS INCLUDE WORKS LICENSED UNDER THE JSON LICENSE? > No. As of 2016-11-03 this has been moved to the 'Category X' license list. > Prior to this, use of the JSON Java library was allowed. See Debian's page > for a list of alternatives. > {quote} > I'm not sure when this dependency was first introduced, but it looks like > it's currently used in a few places: > https://github.com/apache/hive/search?p=1=%22org.json%22=%E2%9C%93 -- This message was sent by Atlassian JIRA (v6.3.15#6346)
[jira] [Updated] (HIVE-15144) JSON.org license is now CatX
[ https://issues.apache.org/jira/browse/HIVE-15144?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Owen O'Malley updated HIVE-15144: - Attachment: HIVE-15144.patch Updated patch so that the condition list isn't quoted in the non-json case. > JSON.org license is now CatX > > > Key: HIVE-15144 > URL: https://issues.apache.org/jira/browse/HIVE-15144 > Project: Hive > Issue Type: Bug >Reporter: Robert Kanter >Priority: Blocker > Fix For: 2.2.0 > > Attachments: HIVE-15144.patch, HIVE-15144.patch, HIVE-15144.patch, > HIVE-15144.patch > > > per [update resolved legal|http://www.apache.org/legal/resolved.html#json]: > {quote} > CAN APACHE PRODUCTS INCLUDE WORKS LICENSED UNDER THE JSON LICENSE? > No. As of 2016-11-03 this has been moved to the 'Category X' license list. > Prior to this, use of the JSON Java library was allowed. See Debian's page > for a list of alternatives. > {quote} > I'm not sure when this dependency was first introduced, but it looks like > it's currently used in a few places: > https://github.com/apache/hive/search?p=1=%22org.json%22=%E2%9C%93 -- This message was sent by Atlassian JIRA (v6.3.15#6346)
[jira] [Commented] (HIVE-16826) Improvements for SeparatedValuesOutputFormat
[ https://issues.apache.org/jira/browse/HIVE-16826?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=16039764#comment-16039764 ] Hive QA commented on HIVE-16826: Here are the results of testing the latest attachment: https://issues.apache.org/jira/secure/attachment/12871658/HIVE-16826.2.patch {color:red}ERROR:{color} -1 due to no test(s) being added or modified. {color:red}ERROR:{color} -1 due to 4 failed/errored test(s), 10820 tests executed *Failed tests:* {noformat} org.apache.hadoop.hive.cli.TestMiniLlapCliDriver.testCliDriver[orc_ppd_basic] (batchId=140) org.apache.hadoop.hive.cli.TestMiniLlapLocalCliDriver.testCliDriver[vector_if_expr] (batchId=145) org.apache.hadoop.hive.cli.TestPerfCliDriver.testCliDriver[query14] (batchId=232) org.apache.hadoop.hive.cli.TestPerfCliDriver.testCliDriver[query78] (batchId=232) {noformat} Test results: https://builds.apache.org/job/PreCommit-HIVE-Build/5552/testReport Console output: https://builds.apache.org/job/PreCommit-HIVE-Build/5552/console Test logs: http://104.198.109.242/logs/PreCommit-HIVE-Build-5552/ Messages: {noformat} Executing org.apache.hive.ptest.execution.TestCheckPhase Executing org.apache.hive.ptest.execution.PrepPhase Executing org.apache.hive.ptest.execution.ExecutionPhase Executing org.apache.hive.ptest.execution.ReportingPhase Tests exited with: TestsFailedException: 4 tests failed {noformat} This message is automatically generated. ATTACHMENT ID: 12871658 - PreCommit-HIVE-Build > Improvements for SeparatedValuesOutputFormat > > > Key: HIVE-16826 > URL: https://issues.apache.org/jira/browse/HIVE-16826 > Project: Hive > Issue Type: Improvement > Components: Beeline >Affects Versions: 2.1.1, 3.0.0 >Reporter: BELUGA BEHR >Priority: Minor > Attachments: HIVE-16826.1.patch, HIVE-16826.2.patch > > > Proposing changes to class > {{org.apache.hive.beeline.SeparatedValuesOutputFormat}}. > # Simplify the code > # Code currently creates and destroys {{CsvListWriter}}, which contains a > buffer, for every line printed > # Use Apache Commons libraries for certain actions > # Prefer non-synchronized {{StringBuilderWriter}} to Java's synchronized > {{StringWriter}} -- This message was sent by Atlassian JIRA (v6.3.15#6346)
[jira] [Assigned] (HIVE-16835) Addedum to HIVE-16745
[ https://issues.apache.org/jira/browse/HIVE-16835?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Vihang Karajgaonkar reassigned HIVE-16835: -- > Addedum to HIVE-16745 > - > > Key: HIVE-16835 > URL: https://issues.apache.org/jira/browse/HIVE-16835 > Project: Hive > Issue Type: Bug >Reporter: Vihang Karajgaonkar >Assignee: Vihang Karajgaonkar > > HIVE-16745 missed fixing the syntax error in hive-schema-1.1.0.mysql.sql -- This message was sent by Atlassian JIRA (v6.3.15#6346)
[jira] [Updated] (HIVE-16834) Review org.apache.hadoop.hive.serde2.ByteStream
[ https://issues.apache.org/jira/browse/HIVE-16834?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] BELUGA BEHR updated HIVE-16834: --- Status: Patch Available (was: Open) > Review org.apache.hadoop.hive.serde2.ByteStream > --- > > Key: HIVE-16834 > URL: https://issues.apache.org/jira/browse/HIVE-16834 > Project: Hive > Issue Type: Improvement > Components: Serializers/Deserializers >Affects Versions: 2.1.1, 3.0.0 >Reporter: BELUGA BEHR >Priority: Trivial > Attachments: HIVE-16834.1.patch > > > Some code clean up and enhancements of > {{org.apache.hadoop.hive.serde2.ByteStream}}. -- This message was sent by Atlassian JIRA (v6.3.15#6346)
[jira] [Updated] (HIVE-16834) Review org.apache.hadoop.hive.serde2.ByteStream
[ https://issues.apache.org/jira/browse/HIVE-16834?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] BELUGA BEHR updated HIVE-16834: --- Attachment: HIVE-16834.1.patch > Review org.apache.hadoop.hive.serde2.ByteStream > --- > > Key: HIVE-16834 > URL: https://issues.apache.org/jira/browse/HIVE-16834 > Project: Hive > Issue Type: Improvement > Components: Serializers/Deserializers >Affects Versions: 2.1.1, 3.0.0 >Reporter: BELUGA BEHR >Priority: Trivial > Attachments: HIVE-16834.1.patch > > > Some code clean up and enhancements of > {{org.apache.hadoop.hive.serde2.ByteStream}}. -- This message was sent by Atlassian JIRA (v6.3.15#6346)
[jira] [Commented] (HIVE-11609) Capability to add a filter to hbase scan via composite key doesn't work
[ https://issues.apache.org/jira/browse/HIVE-11609?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=16039682#comment-16039682 ] Yongzhi Chen commented on HIVE-11609: - [~ashutoshc], I am not very familiar with that part of code, could you help to review it? Thanks > Capability to add a filter to hbase scan via composite key doesn't work > --- > > Key: HIVE-11609 > URL: https://issues.apache.org/jira/browse/HIVE-11609 > Project: Hive > Issue Type: Bug > Components: HBase Handler >Reporter: Swarnim Kulkarni >Assignee: Barna Zsombor Klara > Attachments: HIVE-11609.08.patch, HIVE-11609.09.patch, > HIVE-11609.1.patch.txt, HIVE-11609.2.patch.txt, HIVE-11609.3.patch.txt, > HIVE-11609.4.patch.txt, HIVE-11609.5.patch, HIVE-11609.6.patch.txt, > HIVE-11609.7.patch.txt > > > It seems like the capability to add filter to an hbase scan which was added > as part of HIVE-6411 doesn't work. This is primarily because in the > HiveHBaseInputFormat, the filter is added in the getsplits instead of > getrecordreader. This works fine for start and stop keys but not for filter > because a filter is respected only when an actual scan is performed. This is > also related to the initial refactoring that was done as part of HIVE-3420. -- This message was sent by Atlassian JIRA (v6.3.15#6346)
[jira] [Commented] (HIVE-11609) Capability to add a filter to hbase scan via composite key doesn't work
[ https://issues.apache.org/jira/browse/HIVE-11609?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=16039673#comment-16039673 ] Yongzhi Chen commented on HIVE-11609: - [~zsombor.klara], I am not very comfortable with the change related to handle existing filters. The filter got from property TableScanDesc.FILTER_OBJECT_CONF_STR should include all the needed filter information to the table, to include the existing filters in the scan object may redundant or may cause issue (for example the pass-in scan object contains out-of-date filters. ) ? > Capability to add a filter to hbase scan via composite key doesn't work > --- > > Key: HIVE-11609 > URL: https://issues.apache.org/jira/browse/HIVE-11609 > Project: Hive > Issue Type: Bug > Components: HBase Handler >Reporter: Swarnim Kulkarni >Assignee: Barna Zsombor Klara > Attachments: HIVE-11609.08.patch, HIVE-11609.09.patch, > HIVE-11609.1.patch.txt, HIVE-11609.2.patch.txt, HIVE-11609.3.patch.txt, > HIVE-11609.4.patch.txt, HIVE-11609.5.patch, HIVE-11609.6.patch.txt, > HIVE-11609.7.patch.txt > > > It seems like the capability to add filter to an hbase scan which was added > as part of HIVE-6411 doesn't work. This is primarily because in the > HiveHBaseInputFormat, the filter is added in the getsplits instead of > getrecordreader. This works fine for start and stop keys but not for filter > because a filter is respected only when an actual scan is performed. This is > also related to the initial refactoring that was done as part of HIVE-3420. -- This message was sent by Atlassian JIRA (v6.3.15#6346)
[jira] [Commented] (HIVE-15212) merge branch into master
[ https://issues.apache.org/jira/browse/HIVE-15212?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=16039667#comment-16039667 ] Hive QA commented on HIVE-15212: Here are the results of testing the latest attachment: https://issues.apache.org/jira/secure/attachment/12871662/HIVE-15212.07.patch {color:green}SUCCESS:{color} +1 due to 20 test(s) being added or modified. {color:red}ERROR:{color} -1 due to 10 failed/errored test(s), 10832 tests executed *Failed tests:* {noformat} org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[mm_all] (batchId=64) org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[mm_conversions] (batchId=71) org.apache.hadoop.hive.cli.TestMiniLlapCliDriver.testCliDriver[orc_ppd_basic] (batchId=140) org.apache.hadoop.hive.cli.TestMiniLlapLocalCliDriver.testCliDriver[mm_conversions] (batchId=158) org.apache.hadoop.hive.cli.TestMiniLlapLocalCliDriver.testCliDriver[vector_if_expr] (batchId=145) org.apache.hadoop.hive.cli.TestMiniTezCliDriver.testCliDriver[explainanalyze_2] (batchId=99) org.apache.hadoop.hive.cli.TestPerfCliDriver.testCliDriver[query14] (batchId=232) org.apache.hadoop.hive.cli.TestPerfCliDriver.testCliDriver[query78] (batchId=232) org.apache.hadoop.hive.llap.security.TestLlapSignerImpl.testSigning (batchId=289) org.apache.hive.jdbc.TestMultiSessionsHS2WithLocalClusterSpark.testSparkQuery (batchId=226) {noformat} Test results: https://builds.apache.org/job/PreCommit-HIVE-Build/5551/testReport Console output: https://builds.apache.org/job/PreCommit-HIVE-Build/5551/console Test logs: http://104.198.109.242/logs/PreCommit-HIVE-Build-5551/ Messages: {noformat} Executing org.apache.hive.ptest.execution.TestCheckPhase Executing org.apache.hive.ptest.execution.PrepPhase Executing org.apache.hive.ptest.execution.ExecutionPhase Executing org.apache.hive.ptest.execution.ReportingPhase Tests exited with: TestsFailedException: 10 tests failed {noformat} This message is automatically generated. ATTACHMENT ID: 12871662 - PreCommit-HIVE-Build > merge branch into master > > > Key: HIVE-15212 > URL: https://issues.apache.org/jira/browse/HIVE-15212 > Project: Hive > Issue Type: Sub-task >Reporter: Sergey Shelukhin >Assignee: Wei Zheng > Attachments: HIVE-15212.01.patch, HIVE-15212.02.patch, > HIVE-15212.03.patch, HIVE-15212.04.patch, HIVE-15212.05.patch, > HIVE-15212.06.patch, HIVE-15212.07.patch > > -- This message was sent by Atlassian JIRA (v6.3.15#6346)
[jira] [Updated] (HIVE-16832) duplicate ROW__ID possible in multi insert into transactional table
[ https://issues.apache.org/jira/browse/HIVE-16832?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Eugene Koifman updated HIVE-16832: -- Summary: duplicate ROW__ID possible in multi insert into transactional table (was: duplicate ROW__ID) > duplicate ROW__ID possible in multi insert into transactional table > --- > > Key: HIVE-16832 > URL: https://issues.apache.org/jira/browse/HIVE-16832 > Project: Hive > Issue Type: Bug > Components: Transactions >Affects Versions: 2.2.0 >Reporter: Eugene Koifman >Assignee: Eugene Koifman >Priority: Critical > -- This message was sent by Atlassian JIRA (v6.3.15#6346)
[jira] [Updated] (HIVE-16832) duplicate ROW__ID
[ https://issues.apache.org/jira/browse/HIVE-16832?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Eugene Koifman updated HIVE-16832: -- Summary: duplicate ROW__ID (was: duplicate ROW__ID with split update) > duplicate ROW__ID > - > > Key: HIVE-16832 > URL: https://issues.apache.org/jira/browse/HIVE-16832 > Project: Hive > Issue Type: Bug > Components: Transactions >Affects Versions: 2.2.0 >Reporter: Eugene Koifman >Assignee: Eugene Koifman >Priority: Critical > -- This message was sent by Atlassian JIRA (v6.3.15#6346)
[jira] [Commented] (HIVE-16831) Add unit tests for NPE fixes in HIVE-12054
[ https://issues.apache.org/jira/browse/HIVE-16831?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=16039627#comment-16039627 ] Anthony Hsu commented on HIVE-16831: [~sbeeram]: I suggest putting all the tests in one qfile vs. separate files. > Add unit tests for NPE fixes in HIVE-12054 > -- > > Key: HIVE-16831 > URL: https://issues.apache.org/jira/browse/HIVE-16831 > Project: Hive > Issue Type: Bug > Components: Hive >Reporter: Sunitha Beeram >Assignee: Sunitha Beeram > Attachments: HIVE-16831.1.patch > > > HIVE-12054 fixed NPE issues related to ObjectInspector which get triggered > when an empty ORC table/partition is read. > This work adds tests that trigger that path. -- This message was sent by Atlassian JIRA (v6.3.15#6346)
[jira] [Updated] (HIVE-16667) PostgreSQL metastore handling of CLOB types for COLUMNS_V2.TYPE_NAME and other field is incorrect
[ https://issues.apache.org/jira/browse/HIVE-16667?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Naveen Gangam updated HIVE-16667: - Attachment: HIVE-16667.2.patch > PostgreSQL metastore handling of CLOB types for COLUMNS_V2.TYPE_NAME and > other field is incorrect > - > > Key: HIVE-16667 > URL: https://issues.apache.org/jira/browse/HIVE-16667 > Project: Hive > Issue Type: Bug >Reporter: Remus Rusanu >Assignee: Naveen Gangam > Attachments: HIVE-16667.2.patch, HIVE-16667.patch, HiveCLIOutput.txt, > PostgresDBOutput.txt > > > The CLOB JDO type introduced with HIVE-12274 does not work correctly with > PostgreSQL. The value is written out-of-band and the LOB handle is written,as > an INT, into the table. SELECTs return the INT value, which should had been > read via the {{lo_get}} PG built-in, and then cast into string. > Furthermore, the behavior is different between fields upgraded from earlier > metastore versions (they retain their string storage) vs. values inserted > after the upgrade (inserted as LOB roots). > Teh code in > {{MetasoreDirectSql.getPartitionsFromPartitionIds/extractSqlClob}} expects > the underlying JDO/Datanucleus to map the column to a {{Clob}} but that does > not happen, the value is a Java String containing the int which is the LOB > root saved by PG. > This manifests at runtime with errors like: > {code} > hive> select * from srcpart; > Failed with exception java.io.IOException:java.lang.IllegalArgumentException: > Error: type expected at the position 0 of '24030:24031' but '24030' is found. > {code} > the 24030:24031 should be 'string:string'. > repro: > {code} > CREATE TABLE srcpart (key STRING COMMENT 'default', value STRING COMMENT > 'default') PARTITIONED BY (ds STRING, hr STRING) STORED AS TEXTFILE; > LOAD DATA LOCAL INPATH "${hiveconf:test.data.dir}/kv1.txt" OVERWRITE INTO > TABLE srcpart PARTITION (ds="2008-04-09", hr="11"); > select * from srcpart; > {code} > I did not see the issue being hit by non-partitioned/textfile tables, but > that is just the luck of the path taken by the code. Inspection of my PG > metastore shows all the CLOB fields suffering from this issue. -- This message was sent by Atlassian JIRA (v6.3.15#6346)
[jira] [Updated] (HIVE-16667) PostgreSQL metastore handling of CLOB types for COLUMNS_V2.TYPE_NAME and other field is incorrect
[ https://issues.apache.org/jira/browse/HIVE-16667?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Naveen Gangam updated HIVE-16667: - Status: Patch Available (was: Open) > PostgreSQL metastore handling of CLOB types for COLUMNS_V2.TYPE_NAME and > other field is incorrect > - > > Key: HIVE-16667 > URL: https://issues.apache.org/jira/browse/HIVE-16667 > Project: Hive > Issue Type: Bug >Reporter: Remus Rusanu >Assignee: Naveen Gangam > Attachments: HIVE-16667.2.patch, HIVE-16667.patch, HiveCLIOutput.txt, > PostgresDBOutput.txt > > > The CLOB JDO type introduced with HIVE-12274 does not work correctly with > PostgreSQL. The value is written out-of-band and the LOB handle is written,as > an INT, into the table. SELECTs return the INT value, which should had been > read via the {{lo_get}} PG built-in, and then cast into string. > Furthermore, the behavior is different between fields upgraded from earlier > metastore versions (they retain their string storage) vs. values inserted > after the upgrade (inserted as LOB roots). > Teh code in > {{MetasoreDirectSql.getPartitionsFromPartitionIds/extractSqlClob}} expects > the underlying JDO/Datanucleus to map the column to a {{Clob}} but that does > not happen, the value is a Java String containing the int which is the LOB > root saved by PG. > This manifests at runtime with errors like: > {code} > hive> select * from srcpart; > Failed with exception java.io.IOException:java.lang.IllegalArgumentException: > Error: type expected at the position 0 of '24030:24031' but '24030' is found. > {code} > the 24030:24031 should be 'string:string'. > repro: > {code} > CREATE TABLE srcpart (key STRING COMMENT 'default', value STRING COMMENT > 'default') PARTITIONED BY (ds STRING, hr STRING) STORED AS TEXTFILE; > LOAD DATA LOCAL INPATH "${hiveconf:test.data.dir}/kv1.txt" OVERWRITE INTO > TABLE srcpart PARTITION (ds="2008-04-09", hr="11"); > select * from srcpart; > {code} > I did not see the issue being hit by non-partitioned/textfile tables, but > that is just the luck of the path taken by the code. Inspection of my PG > metastore shows all the CLOB fields suffering from this issue. -- This message was sent by Atlassian JIRA (v6.3.15#6346)
[jira] [Updated] (HIVE-16667) PostgreSQL metastore handling of CLOB types for COLUMNS_V2.TYPE_NAME and other field is incorrect
[ https://issues.apache.org/jira/browse/HIVE-16667?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Naveen Gangam updated HIVE-16667: - Status: Open (was: Patch Available) > PostgreSQL metastore handling of CLOB types for COLUMNS_V2.TYPE_NAME and > other field is incorrect > - > > Key: HIVE-16667 > URL: https://issues.apache.org/jira/browse/HIVE-16667 > Project: Hive > Issue Type: Bug >Reporter: Remus Rusanu >Assignee: Naveen Gangam > Attachments: HIVE-16667.patch, HiveCLIOutput.txt, PostgresDBOutput.txt > > > The CLOB JDO type introduced with HIVE-12274 does not work correctly with > PostgreSQL. The value is written out-of-band and the LOB handle is written,as > an INT, into the table. SELECTs return the INT value, which should had been > read via the {{lo_get}} PG built-in, and then cast into string. > Furthermore, the behavior is different between fields upgraded from earlier > metastore versions (they retain their string storage) vs. values inserted > after the upgrade (inserted as LOB roots). > Teh code in > {{MetasoreDirectSql.getPartitionsFromPartitionIds/extractSqlClob}} expects > the underlying JDO/Datanucleus to map the column to a {{Clob}} but that does > not happen, the value is a Java String containing the int which is the LOB > root saved by PG. > This manifests at runtime with errors like: > {code} > hive> select * from srcpart; > Failed with exception java.io.IOException:java.lang.IllegalArgumentException: > Error: type expected at the position 0 of '24030:24031' but '24030' is found. > {code} > the 24030:24031 should be 'string:string'. > repro: > {code} > CREATE TABLE srcpart (key STRING COMMENT 'default', value STRING COMMENT > 'default') PARTITIONED BY (ds STRING, hr STRING) STORED AS TEXTFILE; > LOAD DATA LOCAL INPATH "${hiveconf:test.data.dir}/kv1.txt" OVERWRITE INTO > TABLE srcpart PARTITION (ds="2008-04-09", hr="11"); > select * from srcpart; > {code} > I did not see the issue being hit by non-partitioned/textfile tables, but > that is just the luck of the path taken by the code. Inspection of my PG > metastore shows all the CLOB fields suffering from this issue. -- This message was sent by Atlassian JIRA (v6.3.15#6346)
[jira] [Commented] (HIVE-16667) PostgreSQL metastore handling of CLOB types for COLUMNS_V2.TYPE_NAME and other field is incorrect
[ https://issues.apache.org/jira/browse/HIVE-16667?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=16039623#comment-16039623 ] Naveen Gangam commented on HIVE-16667: -- [~rusanu] I have had a chance to test with VARCHAR in the JDO mappings file with Oracle, Postgres and Derby. Seems to be working without any issues. I am uploading the patch with the changes. Could you please review it when you get a chance? Thanks {code} 2017-06-06T15:50:41,716 DEBUG [pool-7-thread-4] metastore.MetaStoreDirectSql: Direct SQL query in 280.51028ms + 0.084425ms, the query is [select "PARTITIONS"."PART_ID" from "PARTITIONS" inner join "TBLS" on "PARTITIONS"."TBL_ID" = "TBLS"."TBL_ID" and "TBLS"."TBL_NAME" = ? inner join "DBS" on "TBLS"."DB_ID" = "DBS"."DB_ID" and "DBS"."NAME" = ? ] 2017-06-06T15:50:41,858 DEBUG [pool-7-thread-4] metastore.MetaStoreDirectSql: Direct SQL query in 141.674264ms + 0.379869ms, the query is [select "PARTITIONS"."PART_ID", "SDS"."SD_ID", "SDS"."CD_ID", "SERDES"."SERDE_ID", "PARTITIONS"."CREATE_TIME", "PARTITIONS"."LAST_ACCESS_TIME", "SDS"."INPUT_FORMAT", "SDS"."IS_COMPRESSED", "SDS"."IS_STOREDASSUBDIRECTORIES", "SDS"."LOCATION", "SDS"."NUM_BUCKETS", "SDS"."OUTPUT_FORMAT", "SERDES"."NAME", "SERDES"."SLIB" from "PARTITIONS" left outer join "SDS" on "PARTITIONS"."SD_ID" = "SDS"."SD_ID" left outer join "SERDES" on "SDS"."SERDE_ID" = "SERDES"."SERDE_ID" where "PART_ID" in (1) order by "PART_NAME" asc] 2017-06-06T15:50:42,002 DEBUG [pool-7-thread-4] metastore.MetaStoreDirectSql: Direct SQL query in 142.592725ms + 0.294649ms, the query is [select "PART_ID", "PARAM_KEY", "PARAM_VALUE" from "PARTITION_PARAMS" where "PART_ID" in (1) and "PARAM_KEY" is not null order by "PART_ID" asc] 2017-06-06T15:50:42,142 DEBUG [pool-7-thread-4] metastore.MetaStoreDirectSql: Direct SQL query in 139.308419ms + 0.229592ms, the query is [select "PART_ID", "PART_KEY_VAL" from "PARTITION_KEY_VALS" where "PART_ID" in (1) and "INTEGER_IDX" >= 0 order by "PART_ID" asc, "INTEGER_IDX" asc] 2017-06-06T15:50:42,281 DEBUG [pool-7-thread-4] metastore.MetaStoreDirectSql: Direct SQL query in 139.136081ms + 0.092378ms, the query is [select "SD_ID", "PARAM_KEY", "PARAM_VALUE" from "SD_PARAMS" where "SD_ID" in (2) and "PARAM_KEY" is not null order by "SD_ID" asc] 2017-06-06T15:50:42,423 DEBUG [pool-7-thread-4] metastore.MetaStoreDirectSql: Direct SQL query in 141.601391ms + 0.08076ms, the query is [select "SD_ID", "COLUMN_NAME", "SORT_COLS"."ORDER" from "SORT_COLS" where "SD_ID" in (2) and "INTEGER_IDX" >= 0 order by "SD_ID" asc, "INTEGER_IDX" asc] 2017-06-06T15:50:42,564 DEBUG [pool-7-thread-4] metastore.MetaStoreDirectSql: Direct SQL query in 140.216113ms + 0.114448ms, the query is [select "SD_ID", "BUCKET_COL_NAME" from "BUCKETING_COLS" where "SD_ID" in (2) and "INTEGER_IDX" >= 0 order by "SD_ID" asc, "INTEGER_IDX" asc] 2017-06-06T15:50:42,703 DEBUG [pool-7-thread-4] metastore.MetaStoreDirectSql: Direct SQL query in 139.086542ms + 0.094296ms, the query is [select "SD_ID", "SKEWED_COL_NAME" from "SKEWED_COL_NAMES" where "SD_ID" in (2) and "INTEGER_IDX" >= 0 order by "SD_ID" asc, "INTEGER_IDX" asc] 2017-06-06T15:50:42,847 DEBUG [pool-7-thread-4] metastore.MetaStoreDirectSql: Direct SQL query in 142.786353ms + 0.433393ms, the query is [select "CD_ID", "COMMENT", "COLUMN_NAME", "TYPE_NAME" from "COLUMNS_V2" where "CD_ID" in (1) and "INTEGER_IDX" >= 0 order by "CD_ID" asc, "INTEGER_IDX" asc] 2017-06-06T15:50:42,990 DEBUG [pool-7-thread-4] metastore.MetaStoreDirectSql: Direct SQL query in 142.115654m {code} > PostgreSQL metastore handling of CLOB types for COLUMNS_V2.TYPE_NAME and > other field is incorrect > - > > Key: HIVE-16667 > URL: https://issues.apache.org/jira/browse/HIVE-16667 > Project: Hive > Issue Type: Bug >Reporter: Remus Rusanu >Assignee: Naveen Gangam > Attachments: HIVE-16667.patch, HiveCLIOutput.txt, PostgresDBOutput.txt > > > The CLOB JDO type introduced with HIVE-12274 does not work correctly with > PostgreSQL. The value is written out-of-band and the LOB handle is written,as > an INT, into the table. SELECTs return the INT value, which should had been > read via the {{lo_get}} PG built-in, and then cast into string. > Furthermore, the behavior is different between fields upgraded from earlier > metastore versions (they retain their string storage) vs. values inserted > after the upgrade (inserted as LOB roots). > Teh code in > {{MetasoreDirectSql.getPartitionsFromPartitionIds/extractSqlClob}} expects > the underlying JDO/Datanucleus to map the column to a {{Clob}} but that does > not happen, the value is a Java String containing the int which is the LOB > root saved by PG. > This manifests at runtime with errors like:
[jira] [Commented] (HIVE-16804) Semijoin hint : Needs support for target table.
[ https://issues.apache.org/jira/browse/HIVE-16804?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=16039595#comment-16039595 ] Jason Dere commented on HIVE-16804: --- +1 > Semijoin hint : Needs support for target table. > --- > > Key: HIVE-16804 > URL: https://issues.apache.org/jira/browse/HIVE-16804 > Project: Hive > Issue Type: Bug >Reporter: Deepak Jaiswal >Assignee: Deepak Jaiswal > Attachments: HIVE-16804.1.patch, HIVE-16804.2.patch, > HIVE-16804.3.patch > > > Currently the semijoin hint takes source table input. However, to provide > better control, also provide the target table name in hint. -- This message was sent by Atlassian JIRA (v6.3.15#6346)
[jira] [Commented] (HIVE-16831) Add unit tests for NPE fixes in HIVE-12054
[ https://issues.apache.org/jira/browse/HIVE-16831?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=16039553#comment-16039553 ] Hive QA commented on HIVE-16831: Here are the results of testing the latest attachment: https://issues.apache.org/jira/secure/attachment/12871651/HIVE-16831.1.patch {color:green}SUCCESS:{color} +1 due to 4 test(s) being added or modified. {color:red}ERROR:{color} -1 due to 6 failed/errored test(s), 10824 tests executed *Failed tests:* {noformat} org.apache.hadoop.hive.cli.TestBeeLineDriver.testCliDriver[materialized_view_create_rewrite] (batchId=237) org.apache.hadoop.hive.cli.TestMiniLlapCliDriver.testCliDriver[orc_ppd_basic] (batchId=140) org.apache.hadoop.hive.cli.TestMiniLlapLocalCliDriver.testCliDriver[vector_if_expr] (batchId=145) org.apache.hadoop.hive.cli.TestPerfCliDriver.testCliDriver[query14] (batchId=232) org.apache.hadoop.hive.cli.TestPerfCliDriver.testCliDriver[query23] (batchId=232) org.apache.hadoop.hive.cli.TestPerfCliDriver.testCliDriver[query78] (batchId=232) {noformat} Test results: https://builds.apache.org/job/PreCommit-HIVE-Build/5550/testReport Console output: https://builds.apache.org/job/PreCommit-HIVE-Build/5550/console Test logs: http://104.198.109.242/logs/PreCommit-HIVE-Build-5550/ Messages: {noformat} Executing org.apache.hive.ptest.execution.TestCheckPhase Executing org.apache.hive.ptest.execution.PrepPhase Executing org.apache.hive.ptest.execution.ExecutionPhase Executing org.apache.hive.ptest.execution.ReportingPhase Tests exited with: TestsFailedException: 6 tests failed {noformat} This message is automatically generated. ATTACHMENT ID: 12871651 - PreCommit-HIVE-Build > Add unit tests for NPE fixes in HIVE-12054 > -- > > Key: HIVE-16831 > URL: https://issues.apache.org/jira/browse/HIVE-16831 > Project: Hive > Issue Type: Bug > Components: Hive >Reporter: Sunitha Beeram >Assignee: Sunitha Beeram > Attachments: HIVE-16831.1.patch > > > HIVE-12054 fixed NPE issues related to ObjectInspector which get triggered > when an empty ORC table/partition is read. > This work adds tests that trigger that path. -- This message was sent by Atlassian JIRA (v6.3.15#6346)
[jira] [Updated] (HIVE-16804) Semijoin hint : Needs support for target table.
[ https://issues.apache.org/jira/browse/HIVE-16804?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Deepak Jaiswal updated HIVE-16804: -- Attachment: HIVE-16804.3.patch Fixed an issue in test found by Jason. > Semijoin hint : Needs support for target table. > --- > > Key: HIVE-16804 > URL: https://issues.apache.org/jira/browse/HIVE-16804 > Project: Hive > Issue Type: Bug >Reporter: Deepak Jaiswal >Assignee: Deepak Jaiswal > Attachments: HIVE-16804.1.patch, HIVE-16804.2.patch, > HIVE-16804.3.patch > > > Currently the semijoin hint takes source table input. However, to provide > better control, also provide the target table name in hint. -- This message was sent by Atlassian JIRA (v6.3.15#6346)
[jira] [Updated] (HIVE-16791) Tez engine giving inaccurate results on SMB Map joins while map-join and shuffle join gets correct results
[ https://issues.apache.org/jira/browse/HIVE-16791?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Saumil Mayani updated HIVE-16791: - Attachment: sample-data-query.txt > Tez engine giving inaccurate results on SMB Map joins while map-join and > shuffle join gets correct results > -- > > Key: HIVE-16791 > URL: https://issues.apache.org/jira/browse/HIVE-16791 > Project: Hive > Issue Type: Bug > Components: Hive, HiveServer2 >Reporter: Saumil Mayani >Assignee: Deepak Jaiswal > Attachments: sample-data-query.txt, sample-data.tar.gz-aa, > sample-data.tar.gz-ab, sample-data.tar.gz-ac, sample-data.tar.gz-ad > > > SMB Join gives incorrect results. > {code} > SMB-Join > set hive.execution.engine=tez; > set hive.enforce.sortmergebucketmapjoin=false; > set hive.optimize.bucketmapjoin=true; > set hive.optimize.bucketmapjoin.sortedmerge=true; > set hive.auto.convert.sortmerge.join=true; > set hive.auto.convert.join=true; > set hive.auto.convert.join.noconditionaltask.size=50; > OK > 2016 1 11999639 > 2016 2 18955110 > 2017 2 22217437 > Time taken: 92.647 seconds, Fetched: 3 row(s) > {code} > {code} > MAP-JOIN > set hive.execution.engine=tez; > set hive.enforce.sortmergebucketmapjoin=false; > set hive.optimize.bucketmapjoin=true; > set hive.optimize.bucketmapjoin.sortedmerge=true; > set hive.auto.convert.sortmerge.join=true; > set hive.auto.convert.join=true; > set hive.auto.convert.join.noconditionaltask.size=5000; > OK > 2016 1 26586093 > 2016 2 17724062 > 2017 2 8862031 > Time taken: 17.49 seconds, Fetched: 3 row(s) > {code} > {code} > Shuffle Join > set hive.execution.engine=tez; > set hive.enforce.sortmergebucketmapjoin=false; > set hive.optimize.bucketmapjoin=true; > set hive.optimize.bucketmapjoin.sortedmerge=true; > set hive.auto.convert.sortmerge.join=false; > set hive.auto.convert.join=false; > set hive.auto.convert.join.noconditionaltask.size=5000; > OK > 2016 1 26586093 > 2016 2 17724062 > 2017 2 8862031 > Time taken: 38.575 seconds, Fetched: 3 row(s) > {code} -- This message was sent by Atlassian JIRA (v6.3.15#6346)
[jira] [Updated] (HIVE-16791) Tez engine giving inaccurate results on SMB Map joins while map-join and shuffle join gets correct results
[ https://issues.apache.org/jira/browse/HIVE-16791?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Saumil Mayani updated HIVE-16791: - Attachment: (was: sample-data-query.txt) > Tez engine giving inaccurate results on SMB Map joins while map-join and > shuffle join gets correct results > -- > > Key: HIVE-16791 > URL: https://issues.apache.org/jira/browse/HIVE-16791 > Project: Hive > Issue Type: Bug > Components: Hive, HiveServer2 >Reporter: Saumil Mayani >Assignee: Deepak Jaiswal > Attachments: sample-data-query.txt, sample-data.tar.gz-aa, > sample-data.tar.gz-ab, sample-data.tar.gz-ac, sample-data.tar.gz-ad > > > SMB Join gives incorrect results. > {code} > SMB-Join > set hive.execution.engine=tez; > set hive.enforce.sortmergebucketmapjoin=false; > set hive.optimize.bucketmapjoin=true; > set hive.optimize.bucketmapjoin.sortedmerge=true; > set hive.auto.convert.sortmerge.join=true; > set hive.auto.convert.join=true; > set hive.auto.convert.join.noconditionaltask.size=50; > OK > 2016 1 11999639 > 2016 2 18955110 > 2017 2 22217437 > Time taken: 92.647 seconds, Fetched: 3 row(s) > {code} > {code} > MAP-JOIN > set hive.execution.engine=tez; > set hive.enforce.sortmergebucketmapjoin=false; > set hive.optimize.bucketmapjoin=true; > set hive.optimize.bucketmapjoin.sortedmerge=true; > set hive.auto.convert.sortmerge.join=true; > set hive.auto.convert.join=true; > set hive.auto.convert.join.noconditionaltask.size=5000; > OK > 2016 1 26586093 > 2016 2 17724062 > 2017 2 8862031 > Time taken: 17.49 seconds, Fetched: 3 row(s) > {code} > {code} > Shuffle Join > set hive.execution.engine=tez; > set hive.enforce.sortmergebucketmapjoin=false; > set hive.optimize.bucketmapjoin=true; > set hive.optimize.bucketmapjoin.sortedmerge=true; > set hive.auto.convert.sortmerge.join=false; > set hive.auto.convert.join=false; > set hive.auto.convert.join.noconditionaltask.size=5000; > OK > 2016 1 26586093 > 2016 2 17724062 > 2017 2 8862031 > Time taken: 38.575 seconds, Fetched: 3 row(s) > {code} -- This message was sent by Atlassian JIRA (v6.3.15#6346)
[jira] [Commented] (HIVE-16323) HS2 JDOPersistenceManagerFactory.pmCache leaks after HIVE-14204
[ https://issues.apache.org/jira/browse/HIVE-16323?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=16039489#comment-16039489 ] Hive QA commented on HIVE-16323: Here are the results of testing the latest attachment: https://issues.apache.org/jira/secure/attachment/12871649/HIVE-16323.4.patch {color:red}ERROR:{color} -1 due to no test(s) being added or modified. {color:red}ERROR:{color} -1 due to 5 failed/errored test(s), 10820 tests executed *Failed tests:* {noformat} org.apache.hadoop.hive.cli.TestCliDriver.testCliDriver[auto_sortmerge_join_2] (batchId=46) org.apache.hadoop.hive.cli.TestMiniLlapCliDriver.testCliDriver[orc_ppd_basic] (batchId=140) org.apache.hadoop.hive.cli.TestPerfCliDriver.testCliDriver[query14] (batchId=232) org.apache.hadoop.hive.cli.TestPerfCliDriver.testCliDriver[query23] (batchId=232) org.apache.hadoop.hive.cli.TestPerfCliDriver.testCliDriver[query78] (batchId=232) {noformat} Test results: https://builds.apache.org/job/PreCommit-HIVE-Build/5549/testReport Console output: https://builds.apache.org/job/PreCommit-HIVE-Build/5549/console Test logs: http://104.198.109.242/logs/PreCommit-HIVE-Build-5549/ Messages: {noformat} Executing org.apache.hive.ptest.execution.TestCheckPhase Executing org.apache.hive.ptest.execution.PrepPhase Executing org.apache.hive.ptest.execution.ExecutionPhase Executing org.apache.hive.ptest.execution.ReportingPhase Tests exited with: TestsFailedException: 5 tests failed {noformat} This message is automatically generated. ATTACHMENT ID: 12871649 - PreCommit-HIVE-Build > HS2 JDOPersistenceManagerFactory.pmCache leaks after HIVE-14204 > --- > > Key: HIVE-16323 > URL: https://issues.apache.org/jira/browse/HIVE-16323 > Project: Hive > Issue Type: Bug > Components: HiveServer2 >Reporter: Daniel Dai >Assignee: Daniel Dai > Attachments: HIVE-16323.1.patch, HIVE-16323.2.patch, > HIVE-16323.3.patch, HIVE-16323.4.patch, PM_leak.png > > > Hive.loadDynamicPartitions creates threads with new embedded rawstore, but > never close them, thus we leak PersistenceManager one per such thread. -- This message was sent by Atlassian JIRA (v6.3.15#6346)
[jira] [Updated] (HIVE-16833) Review org.apache.hive.jdbc.HiveMetaDataResultSet
[ https://issues.apache.org/jira/browse/HIVE-16833?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] BELUGA BEHR updated HIVE-16833: --- Status: Patch Available (was: Open) > Review org.apache.hive.jdbc.HiveMetaDataResultSet > - > > Key: HIVE-16833 > URL: https://issues.apache.org/jira/browse/HIVE-16833 > Project: Hive > Issue Type: Improvement >Affects Versions: 2.1.1, 3.0.0 >Reporter: BELUGA BEHR >Priority: Trivial > Attachments: HIVE-16833.1.patch > > > Reviewed and improved {{org.apache.hive.jdbc.HiveMetaDataResultSet}}. > Removed a compiler warning. -- This message was sent by Atlassian JIRA (v6.3.15#6346)
[jira] [Updated] (HIVE-16833) Review org.apache.hive.jdbc.HiveMetaDataResultSet
[ https://issues.apache.org/jira/browse/HIVE-16833?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] BELUGA BEHR updated HIVE-16833: --- Attachment: HIVE-16833.1.patch > Review org.apache.hive.jdbc.HiveMetaDataResultSet > - > > Key: HIVE-16833 > URL: https://issues.apache.org/jira/browse/HIVE-16833 > Project: Hive > Issue Type: Improvement >Affects Versions: 2.1.1, 3.0.0 >Reporter: BELUGA BEHR >Priority: Trivial > Attachments: HIVE-16833.1.patch > > > Reviewed and improved {{org.apache.hive.jdbc.HiveMetaDataResultSet}}. > Removed a compiler warning. -- This message was sent by Atlassian JIRA (v6.3.15#6346)
[jira] [Commented] (HIVE-11538) Add an option to skip init script while running tests
[ https://issues.apache.org/jira/browse/HIVE-11538?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=16039476#comment-16039476 ] Zoltan Haindrich commented on HIVE-11538: - [~ashutoshc] [~wzheng] oh...it seems like this have been changed by that upgrade - because the initScript setting location is now inside the java code; the maven part defaults to setting it to an empty string (because it have to be passed over to surefire...) - for this reason the method which picks up the overriden value for {{initScript}} threats the empty value as "not set" - since it is impossible to unset a property if its empty... I always use some nonexistent initscript value ({{-DinitScript=asd.sql}}) to skip the initscript...now I know why - I wasn't aware that I've broken this... I think the reason behind the need to skip the initscript is because the test data-dependencies are "hidden", and everything is pushed into q_test_init.sql...there are many datasets in there which are rarely used1 test usually uses only 1 dataset...I've collected a few ideas which may help here: https://gist.github.com/kgyrtkirk/89931b7e420c4950b7b36cc8fc1cb6a2 > Add an option to skip init script while running tests > - > > Key: HIVE-11538 > URL: https://issues.apache.org/jira/browse/HIVE-11538 > Project: Hive > Issue Type: Improvement > Components: Testing Infrastructure >Reporter: Ashutosh Chauhan >Assignee: Ashutosh Chauhan > Fix For: 2.0.0 > > Attachments: HIVE-11538.2.patch, HIVE-11538.3.patch, HIVE-11538.patch > > > {{q_test_init.sql}} has grown over time. Now, it takes substantial amount of > time. When debugging a particular query which doesn't need such > initialization, this delay is annoyance. -- This message was sent by Atlassian JIRA (v6.3.15#6346)
[jira] [Assigned] (HIVE-16832) duplicate ROW__ID with split update
[ https://issues.apache.org/jira/browse/HIVE-16832?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Eugene Koifman reassigned HIVE-16832: - > duplicate ROW__ID with split update > --- > > Key: HIVE-16832 > URL: https://issues.apache.org/jira/browse/HIVE-16832 > Project: Hive > Issue Type: Bug > Components: Transactions >Affects Versions: 2.2.0 >Reporter: Eugene Koifman >Assignee: Eugene Koifman >Priority: Critical > -- This message was sent by Atlassian JIRA (v6.3.15#6346)
[jira] [Updated] (HIVE-15212) merge branch into master
[ https://issues.apache.org/jira/browse/HIVE-15212?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Wei Zheng updated HIVE-15212: - Attachment: HIVE-15212.07.patch > merge branch into master > > > Key: HIVE-15212 > URL: https://issues.apache.org/jira/browse/HIVE-15212 > Project: Hive > Issue Type: Sub-task >Reporter: Sergey Shelukhin >Assignee: Wei Zheng > Attachments: HIVE-15212.01.patch, HIVE-15212.02.patch, > HIVE-15212.03.patch, HIVE-15212.04.patch, HIVE-15212.05.patch, > HIVE-15212.06.patch, HIVE-15212.07.patch > > -- This message was sent by Atlassian JIRA (v6.3.15#6346)
[jira] [Updated] (HIVE-16826) Improvements for SeparatedValuesOutputFormat
[ https://issues.apache.org/jira/browse/HIVE-16826?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] BELUGA BEHR updated HIVE-16826: --- Status: Patch Available (was: Open) > Improvements for SeparatedValuesOutputFormat > > > Key: HIVE-16826 > URL: https://issues.apache.org/jira/browse/HIVE-16826 > Project: Hive > Issue Type: Improvement > Components: Beeline >Affects Versions: 2.1.1, 3.0.0 >Reporter: BELUGA BEHR >Priority: Minor > Attachments: HIVE-16826.1.patch, HIVE-16826.2.patch > > > Proposing changes to class > {{org.apache.hive.beeline.SeparatedValuesOutputFormat}}. > # Simplify the code > # Code currently creates and destroys {{CsvListWriter}}, which contains a > buffer, for every line printed > # Use Apache Commons libraries for certain actions > # Prefer non-synchronized {{StringBuilderWriter}} to Java's synchronized > {{StringWriter}} -- This message was sent by Atlassian JIRA (v6.3.15#6346)
[jira] [Updated] (HIVE-16826) Improvements for SeparatedValuesOutputFormat
[ https://issues.apache.org/jira/browse/HIVE-16826?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] BELUGA BEHR updated HIVE-16826: --- Attachment: HIVE-16826.2.patch Patch changed to reflect changes introduce to trunk > Improvements for SeparatedValuesOutputFormat > > > Key: HIVE-16826 > URL: https://issues.apache.org/jira/browse/HIVE-16826 > Project: Hive > Issue Type: Improvement > Components: Beeline >Affects Versions: 2.1.1, 3.0.0 >Reporter: BELUGA BEHR >Priority: Minor > Attachments: HIVE-16826.1.patch, HIVE-16826.2.patch > > > Proposing changes to class > {{org.apache.hive.beeline.SeparatedValuesOutputFormat}}. > # Simplify the code > # Code currently creates and destroys {{CsvListWriter}}, which contains a > buffer, for every line printed > # Use Apache Commons libraries for certain actions > # Prefer non-synchronized {{StringBuilderWriter}} to Java's synchronized > {{StringWriter}} -- This message was sent by Atlassian JIRA (v6.3.15#6346)
[jira] [Updated] (HIVE-16826) Improvements for SeparatedValuesOutputFormat
[ https://issues.apache.org/jira/browse/HIVE-16826?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] BELUGA BEHR updated HIVE-16826: --- Status: Open (was: Patch Available) > Improvements for SeparatedValuesOutputFormat > > > Key: HIVE-16826 > URL: https://issues.apache.org/jira/browse/HIVE-16826 > Project: Hive > Issue Type: Improvement > Components: Beeline >Affects Versions: 2.1.1, 3.0.0 >Reporter: BELUGA BEHR >Priority: Minor > Attachments: HIVE-16826.1.patch > > > Proposing changes to class > {{org.apache.hive.beeline.SeparatedValuesOutputFormat}}. > # Simplify the code > # Code currently creates and destroys {{CsvListWriter}}, which contains a > buffer, for every line printed > # Use Apache Commons libraries for certain actions > # Prefer non-synchronized {{StringBuilderWriter}} to Java's synchronized > {{StringWriter}} -- This message was sent by Atlassian JIRA (v6.3.15#6346)
[jira] [Updated] (HIVE-16806) Utilities isEmptyPath Loads All Files
[ https://issues.apache.org/jira/browse/HIVE-16806?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] BELUGA BEHR updated HIVE-16806: --- Status: Patch Available (was: Open) > Utilities isEmptyPath Loads All Files > - > > Key: HIVE-16806 > URL: https://issues.apache.org/jira/browse/HIVE-16806 > Project: Hive > Issue Type: Improvement >Affects Versions: 2.1.1, 3.0.0 >Reporter: BELUGA BEHR >Assignee: BELUGA BEHR >Priority: Minor > Attachments: HIVE-16806.1.patch > > > {code:title=org.apache.hadoop.hive.ql.exec.Utilities.isEmptyPath(Configuration, > Path)} > public static boolean isEmptyPath(Configuration job, Path dirPath) throws > IOException { > FileSystem inpFs = dirPath.getFileSystem(job); > try { > FileStatus[] fStats = inpFs.listStatus(dirPath, > FileUtils.HIDDEN_FILES_PATH_FILTER); > if (fStats.length > 0) { > return false; > } > } catch(FileNotFoundException fnf) { > return true; > } > return true; > } > {code} > You can see here that the code is loading every instance of {{FileStatus}} > even though all we care about here is if there are any. I propose adding a > new filter which stops collecting files into this array once it has found at > least one. -- This message was sent by Atlassian JIRA (v6.3.15#6346)
[jira] [Updated] (HIVE-16806) Utilities isEmptyPath Loads All Files
[ https://issues.apache.org/jira/browse/HIVE-16806?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] BELUGA BEHR updated HIVE-16806: --- Status: Open (was: Patch Available) > Utilities isEmptyPath Loads All Files > - > > Key: HIVE-16806 > URL: https://issues.apache.org/jira/browse/HIVE-16806 > Project: Hive > Issue Type: Improvement >Affects Versions: 2.1.1, 3.0.0 >Reporter: BELUGA BEHR >Assignee: BELUGA BEHR >Priority: Minor > Attachments: HIVE-16806.1.patch > > > {code:title=org.apache.hadoop.hive.ql.exec.Utilities.isEmptyPath(Configuration, > Path)} > public static boolean isEmptyPath(Configuration job, Path dirPath) throws > IOException { > FileSystem inpFs = dirPath.getFileSystem(job); > try { > FileStatus[] fStats = inpFs.listStatus(dirPath, > FileUtils.HIDDEN_FILES_PATH_FILTER); > if (fStats.length > 0) { > return false; > } > } catch(FileNotFoundException fnf) { > return true; > } > return true; > } > {code} > You can see here that the code is loading every instance of {{FileStatus}} > even though all we care about here is if there are any. I propose adding a > new filter which stops collecting files into this array once it has found at > least one. -- This message was sent by Atlassian JIRA (v6.3.15#6346)
[jira] [Commented] (HIVE-16831) Add unit tests for NPE fixes in HIVE-12054
[ https://issues.apache.org/jira/browse/HIVE-16831?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=16039415#comment-16039415 ] Sunitha Beeram commented on HIVE-16831: --- +Watchers > Add unit tests for NPE fixes in HIVE-12054 > -- > > Key: HIVE-16831 > URL: https://issues.apache.org/jira/browse/HIVE-16831 > Project: Hive > Issue Type: Bug > Components: Hive >Reporter: Sunitha Beeram >Assignee: Sunitha Beeram > Attachments: HIVE-16831.1.patch > > > HIVE-12054 fixed NPE issues related to ObjectInspector which get triggered > when an empty ORC table/partition is read. > This work adds tests that trigger that path. -- This message was sent by Atlassian JIRA (v6.3.15#6346)
[jira] [Updated] (HIVE-16831) Add unit tests for NPE fixes in HIVE-12054
[ https://issues.apache.org/jira/browse/HIVE-16831?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Sunitha Beeram updated HIVE-16831: -- Status: Patch Available (was: In Progress) > Add unit tests for NPE fixes in HIVE-12054 > -- > > Key: HIVE-16831 > URL: https://issues.apache.org/jira/browse/HIVE-16831 > Project: Hive > Issue Type: Bug > Components: Hive >Reporter: Sunitha Beeram >Assignee: Sunitha Beeram > Attachments: HIVE-16831.1.patch > > > HIVE-12054 fixed NPE issues related to ObjectInspector which get triggered > when an empty ORC table/partition is read. > This work adds tests that trigger that path. -- This message was sent by Atlassian JIRA (v6.3.15#6346)
[jira] [Updated] (HIVE-16831) Add unit tests for NPE fixes in HIVE-12054
[ https://issues.apache.org/jira/browse/HIVE-16831?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Sunitha Beeram updated HIVE-16831: -- Attachment: HIVE-16831.1.patch > Add unit tests for NPE fixes in HIVE-12054 > -- > > Key: HIVE-16831 > URL: https://issues.apache.org/jira/browse/HIVE-16831 > Project: Hive > Issue Type: Bug > Components: Hive >Reporter: Sunitha Beeram >Assignee: Sunitha Beeram > Attachments: HIVE-16831.1.patch > > > HIVE-12054 fixed NPE issues related to ObjectInspector which get triggered > when an empty ORC table/partition is read. > This work adds tests that trigger that path. -- This message was sent by Atlassian JIRA (v6.3.15#6346)
[jira] [Work started] (HIVE-16831) Add unit tests for NPE fixes in HIVE-12054
[ https://issues.apache.org/jira/browse/HIVE-16831?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Work on HIVE-16831 started by Sunitha Beeram. - > Add unit tests for NPE fixes in HIVE-12054 > -- > > Key: HIVE-16831 > URL: https://issues.apache.org/jira/browse/HIVE-16831 > Project: Hive > Issue Type: Bug > Components: Hive >Reporter: Sunitha Beeram >Assignee: Sunitha Beeram > > HIVE-12054 fixed NPE issues related to ObjectInspector which get triggered > when an empty ORC table/partition is read. > This work adds tests that trigger that path. -- This message was sent by Atlassian JIRA (v6.3.15#6346)
[jira] [Assigned] (HIVE-16831) Add unit tests for NPE fixes in HIVE-12054
[ https://issues.apache.org/jira/browse/HIVE-16831?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Sunitha Beeram reassigned HIVE-16831: - Assignee: Sunitha Beeram > Add unit tests for NPE fixes in HIVE-12054 > -- > > Key: HIVE-16831 > URL: https://issues.apache.org/jira/browse/HIVE-16831 > Project: Hive > Issue Type: Bug > Components: Hive >Reporter: Sunitha Beeram >Assignee: Sunitha Beeram > > HIVE-12054 fixed NPE issues related to ObjectInspector which get triggered > when an empty ORC table/partition is read. > This work adds tests that trigger that path. -- This message was sent by Atlassian JIRA (v6.3.15#6346)
[jira] [Updated] (HIVE-16323) HS2 JDOPersistenceManagerFactory.pmCache leaks after HIVE-14204
[ https://issues.apache.org/jira/browse/HIVE-16323?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Daniel Dai updated HIVE-16323: -- Attachment: HIVE-16323.4.patch Good point to set pm to null. However, getPartition seems involve a big block and synchronize it needs nontrivial code refactory, I don't want to touch it for now. > HS2 JDOPersistenceManagerFactory.pmCache leaks after HIVE-14204 > --- > > Key: HIVE-16323 > URL: https://issues.apache.org/jira/browse/HIVE-16323 > Project: Hive > Issue Type: Bug > Components: HiveServer2 >Reporter: Daniel Dai >Assignee: Daniel Dai > Attachments: HIVE-16323.1.patch, HIVE-16323.2.patch, > HIVE-16323.3.patch, HIVE-16323.4.patch, PM_leak.png > > > Hive.loadDynamicPartitions creates threads with new embedded rawstore, but > never close them, thus we leak PersistenceManager one per such thread. -- This message was sent by Atlassian JIRA (v6.3.15#6346)
[jira] [Updated] (HIVE-16819) Add MM test for temporary table
[ https://issues.apache.org/jira/browse/HIVE-16819?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Wei Zheng updated HIVE-16819: - Fix Version/s: hive-14535 > Add MM test for temporary table > --- > > Key: HIVE-16819 > URL: https://issues.apache.org/jira/browse/HIVE-16819 > Project: Hive > Issue Type: Sub-task >Reporter: Wei Zheng >Assignee: Wei Zheng > Fix For: hive-14535 > > Attachments: HIVE-16819.patch > > -- This message was sent by Atlassian JIRA (v6.3.15#6346)
[jira] [Resolved] (HIVE-16819) Add MM test for temporary table
[ https://issues.apache.org/jira/browse/HIVE-16819?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Wei Zheng resolved HIVE-16819. -- Resolution: Fixed > Add MM test for temporary table > --- > > Key: HIVE-16819 > URL: https://issues.apache.org/jira/browse/HIVE-16819 > Project: Hive > Issue Type: Sub-task >Reporter: Wei Zheng >Assignee: Wei Zheng > Attachments: HIVE-16819.patch > > -- This message was sent by Atlassian JIRA (v6.3.15#6346)
[jira] [Updated] (HIVE-16819) Add MM test for temporary table
[ https://issues.apache.org/jira/browse/HIVE-16819?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Wei Zheng updated HIVE-16819: - Attachment: HIVE-16819.patch > Add MM test for temporary table > --- > > Key: HIVE-16819 > URL: https://issues.apache.org/jira/browse/HIVE-16819 > Project: Hive > Issue Type: Sub-task >Reporter: Wei Zheng >Assignee: Wei Zheng > Attachments: HIVE-16819.patch > > -- This message was sent by Atlassian JIRA (v6.3.15#6346)
[jira] [Updated] (HIVE-16829) A multiline/pretty-print JSON Data Record Reader
[ https://issues.apache.org/jira/browse/HIVE-16829?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Ujjawal Nayak updated HIVE-16829: - Priority: Trivial (was: Minor) > A multiline/pretty-print JSON Data Record Reader > > > Key: HIVE-16829 > URL: https://issues.apache.org/jira/browse/HIVE-16829 > Project: Hive > Issue Type: Improvement > Components: File Formats >Reporter: Ujjawal Nayak >Assignee: Ujjawal Nayak >Priority: Trivial > Original Estimate: 168h > Remaining Estimate: 168h > > We already have 'org.apache.hive.hcatalog.data.JsonSerDe' which can > serialize/deserialize a JSON Record. But it fails when it gets a JSON record > which is in pretty-print format. And we do not have a RecordReader which can > read this multiline JSON structure. > We should create a RecordReader which can create a record from pretty-print > format of JSON. -- This message was sent by Atlassian JIRA (v6.3.15#6346)
[jira] [Updated] (HIVE-16808) WebHCat statusdir parameter doesn't properly handle Unicode characters when using relative path
[ https://issues.apache.org/jira/browse/HIVE-16808?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Eugene Koifman updated HIVE-16808: -- Resolution: Fixed Fix Version/s: 3.0.0 Status: Resolved (was: Patch Available) > WebHCat statusdir parameter doesn't properly handle Unicode characters when > using relative path > --- > > Key: HIVE-16808 > URL: https://issues.apache.org/jira/browse/HIVE-16808 > Project: Hive > Issue Type: Bug > Components: WebHCat >Affects Versions: 1.0.0 >Reporter: Eugene Koifman >Assignee: Eugene Koifman > Fix For: 3.0.0 > > Attachments: HIVE-16808.01.patch, HIVE-16808.02.patch, > HIVE-16808.03.patch > > > {noformat} > curl http://.:20111/templeton/v1/hive?user.name=hive -d execute="select > count(*) from default.all100k" -d statusdir="/user/hive/düsseldorf7" > curl http://:20111/templeton/v1/hive?user.name=hive -d execute="select > count(*) from default.all100k" -d statusdir="/user/hive/䶴狝A﨩O" > {noformat} > will create statusdirs like so > {noformat} > /user/hive/düsseldorf-1 > drwxr-xr-x - hive hive 0 2017-06-01 19:01 /user/hive/düsseldorf7 > drwxr-xr-x - hive hive 0 2017-06-01 19:08 /user/hive/䶴狝A﨩O > {noformat} > but > {noformat} > curl http://.:20111/templeton/v1/hive?user.name=hive -d execute="select > count(*) from default.all100k" -d statusdir="düsseldorf7" > curl http://:20111/templeton/v1/hive?user.name=hive -d execute="select > count(*) from default.all100k" -d statusdir="䶴狝A﨩O" > {noformat} > Will create > {noformat} > drwxr-xr-x - hive hive 0 2017-06-01 00:27 > /user/hive/d%C3%BCsseldorf7 > drwxr-xr-x - hive hive 0 2017-06-01 22:33 > /user/hive/%E4%B6%B4%E7%8B%9DA%EF%A8%A9O > {noformat} -- This message was sent by Atlassian JIRA (v6.3.15#6346)
[jira] [Commented] (HIVE-16808) WebHCat statusdir parameter doesn't properly handle Unicode characters when using relative path
[ https://issues.apache.org/jira/browse/HIVE-16808?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=16039199#comment-16039199 ] Eugene Koifman commented on HIVE-16808: --- Committed to master https://github.com/apache/hive/commit/ee3a3ed35b337ff13156c6c6e27dd332cdee2009 Thanks for the review [~pvary] and [~daijy] > WebHCat statusdir parameter doesn't properly handle Unicode characters when > using relative path > --- > > Key: HIVE-16808 > URL: https://issues.apache.org/jira/browse/HIVE-16808 > Project: Hive > Issue Type: Bug > Components: WebHCat >Affects Versions: 1.0.0 >Reporter: Eugene Koifman >Assignee: Eugene Koifman > Attachments: HIVE-16808.01.patch, HIVE-16808.02.patch, > HIVE-16808.03.patch > > > {noformat} > curl http://.:20111/templeton/v1/hive?user.name=hive -d execute="select > count(*) from default.all100k" -d statusdir="/user/hive/düsseldorf7" > curl http://:20111/templeton/v1/hive?user.name=hive -d execute="select > count(*) from default.all100k" -d statusdir="/user/hive/䶴狝A﨩O" > {noformat} > will create statusdirs like so > {noformat} > /user/hive/düsseldorf-1 > drwxr-xr-x - hive hive 0 2017-06-01 19:01 /user/hive/düsseldorf7 > drwxr-xr-x - hive hive 0 2017-06-01 19:08 /user/hive/䶴狝A﨩O > {noformat} > but > {noformat} > curl http://.:20111/templeton/v1/hive?user.name=hive -d execute="select > count(*) from default.all100k" -d statusdir="düsseldorf7" > curl http://:20111/templeton/v1/hive?user.name=hive -d execute="select > count(*) from default.all100k" -d statusdir="䶴狝A﨩O" > {noformat} > Will create > {noformat} > drwxr-xr-x - hive hive 0 2017-06-01 00:27 > /user/hive/d%C3%BCsseldorf7 > drwxr-xr-x - hive hive 0 2017-06-01 22:33 > /user/hive/%E4%B6%B4%E7%8B%9DA%EF%A8%A9O > {noformat} -- This message was sent by Atlassian JIRA (v6.3.15#6346)
[jira] [Commented] (HIVE-16829) A multiline/pretty-print JSON Data Record Reader
[ https://issues.apache.org/jira/browse/HIVE-16829?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=16039160#comment-16039160 ] Carter Shanklin commented on HIVE-16829: This would be great to see (I see the ticket status is in progress). Also would be great if it didn't have any HCatalog dependencies like the current one seems to have. > A multiline/pretty-print JSON Data Record Reader > > > Key: HIVE-16829 > URL: https://issues.apache.org/jira/browse/HIVE-16829 > Project: Hive > Issue Type: Improvement > Components: File Formats >Reporter: Ujjawal Nayak >Assignee: Ujjawal Nayak >Priority: Minor > Original Estimate: 168h > Remaining Estimate: 168h > > We already have 'org.apache.hive.hcatalog.data.JsonSerDe' which can > serialize/deserialize a JSON Record. But it fails when it gets a JSON record > which is in pretty-print format. And we do not have a RecordReader which can > read this multiline JSON structure. > We should create a RecordReader which can create a record from pretty-print > format of JSON. -- This message was sent by Atlassian JIRA (v6.3.15#6346)
[jira] [Commented] (HIVE-12631) LLAP: support ORC ACID tables
[ https://issues.apache.org/jira/browse/HIVE-12631?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=16039094#comment-16039094 ] Eugene Koifman commented on HIVE-12631: --- [~teddy.choi] could you clarify the changes in VectorizedOrcAcidRowBatchReader. It already returns data with Delete events applied. Why does OrcAcidEncodedDataConsumer do the same. for example {noformat} // we always want to read all the delete delta files. deleteEventReaderOptions.range(0, Long.MAX_VALUE); {noformat} seems like a bug for bug copy What exactly is being cached? > LLAP: support ORC ACID tables > - > > Key: HIVE-12631 > URL: https://issues.apache.org/jira/browse/HIVE-12631 > Project: Hive > Issue Type: Bug > Components: llap, Transactions >Reporter: Sergey Shelukhin >Assignee: Teddy Choi > Attachments: HIVE-12631.10.patch, HIVE-12631.1.patch, > HIVE-12631.2.patch, HIVE-12631.3.patch, HIVE-12631.4.patch, > HIVE-12631.5.patch, HIVE-12631.6.patch, HIVE-12631.7.patch, > HIVE-12631.8.patch, HIVE-12631.8.patch, HIVE-12631.9.patch > > > LLAP uses a completely separate read path in ORC to allow for caching and > parallelization of reads and processing. This path does not support ACID. As > far as I remember ACID logic is embedded inside ORC format; we need to > refactor it to be on top of some interface, if practical; or just port it to > LLAP read path. > Another consideration is how the logic will work with cache. The cache is > currently low-level (CB-level in ORC), so we could just use it to read bases > and deltas (deltas should be cached with higher priority) and merge as usual. > We could also cache merged representation in future. -- This message was sent by Atlassian JIRA (v6.3.15#6346)
[jira] [Commented] (HIVE-16828) With CBO enabled, Query on partitioned views throws IndexOutOfBoundException
[ https://issues.apache.org/jira/browse/HIVE-16828?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=16039032#comment-16039032 ] Ashutosh Chauhan commented on HIVE-16828: - I tried your repro test case on master. It passes without your patch. So, it seems like this is no longer an issue on master. Seems like you have discovered this issue on some previous release. It might be worth investigating which subsequent patch on master fixed this issue and than backport that specific patch for branch you discovered the issue on. > With CBO enabled, Query on partitioned views throws IndexOutOfBoundException > > > Key: HIVE-16828 > URL: https://issues.apache.org/jira/browse/HIVE-16828 > Project: Hive > Issue Type: Bug > Components: CBO >Affects Versions: 1.2.0, 2.1.1, 2.2.0 >Reporter: Adesh Kumar Rao > Attachments: HIVE-16828.patch > > > {code:java} > Caused by: java.lang.AssertionError: Internal error: While invoking method > 'public org.apache.calcite.sql2rel.RelFieldTrimmer$TrimResult > org.apache.calcite.sql2rel.RelFieldTrimmer.trimFields(org.apache.calcite.rel.core.Filter,org.apache.calcite.util.ImmutableBitSet,java.util.Set)' > at org.apache.calcite.util.Util.newInternal(Util.java:789) > at org.apache.calcite.util.ReflectUtil$2.invoke(ReflectUtil.java:534) > at > org.apache.calcite.sql2rel.RelFieldTrimmer.dispatchTrimFields(RelFieldTrimmer.java:270) > at > org.apache.calcite.sql2rel.RelFieldTrimmer.trimChild(RelFieldTrimmer.java:213) > at > org.apache.calcite.sql2rel.RelFieldTrimmer.trimFields(RelFieldTrimmer.java:374) > at > org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveRelFieldTrimmer.trimFields(HiveRelFieldTrimmer.java:394) > ... 98 more > Caused by: java.lang.reflect.InvocationTargetException > at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) > at > sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) > at > sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) > at java.lang.reflect.Method.invoke(Method.java:483) > at org.apache.calcite.util.ReflectUtil$2.invoke(ReflectUtil.java:531) > ... 102 more > Caused by: java.lang.AssertionError: Internal error: While invoking method > 'public org.apache.calcite.sql2rel.RelFieldTrimmer$TrimResult > org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveRelFieldTrimmer.trimFields(org.apache.calcite.rel.core.Project,org.apache.calcite.util.ImmutableBitSet,java.util.Set)' > at org.apache.calcite.util.Util.newInternal(Util.java:789) > at org.apache.calcite.util.ReflectUtil$2.invoke(ReflectUtil.java:534) > at > org.apache.calcite.sql2rel.RelFieldTrimmer.dispatchTrimFields(RelFieldTrimmer.java:270) > at > org.apache.calcite.sql2rel.RelFieldTrimmer.trimChild(RelFieldTrimmer.java:213) > at > org.apache.calcite.sql2rel.RelFieldTrimmer.trimFields(RelFieldTrimmer.java:466) > ... 107 more > Caused by: java.lang.reflect.InvocationTargetException > at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) > at > sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) > at > sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) > at java.lang.reflect.Method.invoke(Method.java:483) > at org.apache.calcite.util.ReflectUtil$2.invoke(ReflectUtil.java:531) > ... 110 more > Caused by: java.lang.IndexOutOfBoundsException: Index: 94, Size: 94 > at java.util.ArrayList.rangeCheck(ArrayList.java:653) > at java.util.ArrayList.get(ArrayList.java:429) > at > org.apache.hadoop.hive.ql.optimizer.calcite.rules.HiveRelFieldTrimmer.trimFields(HiveRelFieldTrimmer.java:390) > {code} > Reproducible case: > {code:java} > CREATE TABLE table1 (id int) PARTITIONED BY (year int) > -- create partitioned view > CREATE VIEW view1 partitioned on (year) as select id, year from table1; > set hive.cbo.enable=true; > select year from view1; > {code} -- This message was sent by Atlassian JIRA (v6.3.15#6346)
[jira] [Commented] (HIVE-16758) Better Select Number of Replications
[ https://issues.apache.org/jira/browse/HIVE-16758?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=16039009#comment-16039009 ] BELUGA BEHR commented on HIVE-16758: All the test errors appear to be related to MapReduce or things that do not reference SparkHashTableSinkOperator > Better Select Number of Replications > > > Key: HIVE-16758 > URL: https://issues.apache.org/jira/browse/HIVE-16758 > Project: Hive > Issue Type: Improvement >Reporter: BELUGA BEHR >Priority: Minor > Attachments: HIVE-16758.1.patch > > > {{org.apache.hadoop.hive.ql.exec.SparkHashTableSinkOperator.java}} > We should be smarter about how we pick a replication number. We should add a > new configuration equivalent to {{mapreduce.client.submit.file.replication}}. > This value should be around the square root of the number of nodes and not > hard-coded in the code. > {code} > public static final String DFS_REPLICATION_MAX = "dfs.replication.max"; > private int minReplication = 10; > @Override > protected void initializeOp(Configuration hconf) throws HiveException { > ... > int dfsMaxReplication = hconf.getInt(DFS_REPLICATION_MAX, minReplication); > // minReplication value should not cross the value of dfs.replication.max > minReplication = Math.min(minReplication, dfsMaxReplication); > } > {code} > https://hadoop.apache.org/docs/r2.7.2/hadoop-mapreduce-client/hadoop-mapreduce-client-core/mapred-default.xml -- This message was sent by Atlassian JIRA (v6.3.15#6346)
[jira] [Commented] (HIVE-16805) Utilities isEmptyPath Logging Too Chatty and Uses Bad Format
[ https://issues.apache.org/jira/browse/HIVE-16805?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=16039002#comment-16039002 ] BELUGA BEHR commented on HIVE-16805: Test failures look unrelated to me > Utilities isEmptyPath Logging Too Chatty and Uses Bad Format > > > Key: HIVE-16805 > URL: https://issues.apache.org/jira/browse/HIVE-16805 > Project: Hive > Issue Type: Improvement > Components: Query Processor >Affects Versions: 2.1.1, 3.0.0 >Reporter: BELUGA BEHR >Priority: Minor > Attachments: HIVE-16805.1.patch > > > {code:title=org.apache.hadoop.hive.ql.exec.Utilities} > public static boolean isEmptyPath(JobConf job, Path dirPath, Context ctx) > throws Exception { > if (ctx != null) { > ContentSummary cs = ctx.getCS(dirPath); > if (cs != null) { > LOG.info("Content Summary " + dirPath + "length: " + cs.getLength() + > " num files: " > + cs.getFileCount() + " num directories: " + > cs.getDirectoryCount()); > return (cs.getLength() == 0 && cs.getFileCount() == 0 && > cs.getDirectoryCount() <= 1); > } else { > LOG.info("Content Summary not cached for " + dirPath); > } > } > return isEmptyPath(job, dirPath); > } > {code} > # This is too chatty. Logging for caching is not necessary for INFO logging. > Move to DEBUG > # Use Log4J's {} format > # Make the messages more symmetrical - The success message doesn't include > the word "cache" in it at all > # Missing a space between the path and "length: " so the path is concatenated > with the string "length:" -- This message was sent by Atlassian JIRA (v6.3.15#6346)
[jira] [Commented] (HIVE-12631) LLAP: support ORC ACID tables
[ https://issues.apache.org/jira/browse/HIVE-12631?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=16038951#comment-16038951 ] Hive QA commented on HIVE-12631: Here are the results of testing the latest attachment: https://issues.apache.org/jira/secure/attachment/12871585/HIVE-12631.10.patch {color:green}SUCCESS:{color} +1 due to 2 test(s) being added or modified. {color:red}ERROR:{color} -1 due to 7 failed/errored test(s), 10820 tests executed *Failed tests:* {noformat} org.apache.hadoop.hive.cli.TestBeeLineDriver.testCliDriver[create_merge_compressed] (batchId=237) org.apache.hadoop.hive.cli.TestHBaseCliDriver.testCliDriver[hbase_ppd_key_range] (batchId=93) org.apache.hadoop.hive.cli.TestMiniLlapCliDriver.testCliDriver[orc_ppd_basic] (batchId=140) org.apache.hadoop.hive.cli.TestMiniLlapLocalCliDriver.testCliDriver[dynamic_semijoin_reduction_3] (batchId=158) org.apache.hadoop.hive.cli.TestPerfCliDriver.testCliDriver[query14] (batchId=232) org.apache.hadoop.hive.cli.TestPerfCliDriver.testCliDriver[query78] (batchId=232) org.apache.hadoop.hive.ql.io.orc.TestVectorizedOrcAcidRowBatchReader.testVectorizedOrcAcidRowBatchReader (batchId=261) {noformat} Test results: https://builds.apache.org/job/PreCommit-HIVE-Build/5548/testReport Console output: https://builds.apache.org/job/PreCommit-HIVE-Build/5548/console Test logs: http://104.198.109.242/logs/PreCommit-HIVE-Build-5548/ Messages: {noformat} Executing org.apache.hive.ptest.execution.TestCheckPhase Executing org.apache.hive.ptest.execution.PrepPhase Executing org.apache.hive.ptest.execution.ExecutionPhase Executing org.apache.hive.ptest.execution.ReportingPhase Tests exited with: TestsFailedException: 7 tests failed {noformat} This message is automatically generated. ATTACHMENT ID: 12871585 - PreCommit-HIVE-Build > LLAP: support ORC ACID tables > - > > Key: HIVE-12631 > URL: https://issues.apache.org/jira/browse/HIVE-12631 > Project: Hive > Issue Type: Bug > Components: llap, Transactions >Reporter: Sergey Shelukhin >Assignee: Teddy Choi > Attachments: HIVE-12631.10.patch, HIVE-12631.1.patch, > HIVE-12631.2.patch, HIVE-12631.3.patch, HIVE-12631.4.patch, > HIVE-12631.5.patch, HIVE-12631.6.patch, HIVE-12631.7.patch, > HIVE-12631.8.patch, HIVE-12631.8.patch, HIVE-12631.9.patch > > > LLAP uses a completely separate read path in ORC to allow for caching and > parallelization of reads and processing. This path does not support ACID. As > far as I remember ACID logic is embedded inside ORC format; we need to > refactor it to be on top of some interface, if practical; or just port it to > LLAP read path. > Another consideration is how the logic will work with cache. The cache is > currently low-level (CB-level in ORC), so we could just use it to read bases > and deltas (deltas should be cached with higher priority) and merge as usual. > We could also cache merged representation in future. -- This message was sent by Atlassian JIRA (v6.3.15#6346)
[jira] [Assigned] (HIVE-1627) Hive Join returns incorrect results if the join is (bigint = string)
[ https://issues.apache.org/jira/browse/HIVE-1627?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Fei Hui reassigned HIVE-1627: - Assignee: Fei Hui > Hive Join returns incorrect results if the join is (bigint = string) > > > Key: HIVE-1627 > URL: https://issues.apache.org/jira/browse/HIVE-1627 > Project: Hive > Issue Type: Bug > Components: Query Processor >Affects Versions: 0.5.0 >Reporter: Abhinav Gupta >Assignee: Fei Hui > > I was running a query joining on bigint column with string column. > And, result was incorrect because only "16 bytes seemed to be compared". The > length of value more than 16 bytes when represented as base-10. > The problem was fixed once I changed the join to (bigint = cast (string as > bigint)) > Is the bug because of type conversion on join keys? -- This message was sent by Atlassian JIRA (v6.3.15#6346)
[jira] [Assigned] (HIVE-1627) Hive Join returns incorrect results if the join is (bigint = string)
[ https://issues.apache.org/jira/browse/HIVE-1627?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] kevy liu reassigned HIVE-1627: -- Assignee: kevy liu (was: Fei Hui) > Hive Join returns incorrect results if the join is (bigint = string) > > > Key: HIVE-1627 > URL: https://issues.apache.org/jira/browse/HIVE-1627 > Project: Hive > Issue Type: Bug > Components: Query Processor >Affects Versions: 0.5.0 >Reporter: Abhinav Gupta >Assignee: kevy liu > > I was running a query joining on bigint column with string column. > And, result was incorrect because only "16 bytes seemed to be compared". The > length of value more than 16 bytes when represented as base-10. > The problem was fixed once I changed the join to (bigint = cast (string as > bigint)) > Is the bug because of type conversion on join keys? -- This message was sent by Atlassian JIRA (v6.3.15#6346)
[jira] [Commented] (HIVE-15101) Spark client process can be stuck when UNHEALTHY NodeManager exists
[ https://issues.apache.org/jira/browse/HIVE-15101?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=16038869#comment-16038869 ] Hive QA commented on HIVE-15101: Here are the results of testing the latest attachment: https://issues.apache.org/jira/secure/attachment/12837090/HIVE-15101.patch {color:red}ERROR:{color} -1 due to no test(s) being added or modified. {color:red}ERROR:{color} -1 due to 6 failed/errored test(s), 10820 tests executed *Failed tests:* {noformat} org.apache.hadoop.hive.cli.TestBeeLineDriver.testCliDriver[create_merge_compressed] (batchId=237) org.apache.hadoop.hive.cli.TestBeeLineDriver.testCliDriver[insert_overwrite_local_directory_1] (batchId=237) org.apache.hadoop.hive.cli.TestMiniLlapCliDriver.testCliDriver[orc_ppd_basic] (batchId=140) org.apache.hadoop.hive.cli.TestPerfCliDriver.testCliDriver[query14] (batchId=232) org.apache.hadoop.hive.cli.TestPerfCliDriver.testCliDriver[query23] (batchId=232) org.apache.hadoop.hive.cli.TestPerfCliDriver.testCliDriver[query78] (batchId=232) {noformat} Test results: https://builds.apache.org/job/PreCommit-HIVE-Build/5547/testReport Console output: https://builds.apache.org/job/PreCommit-HIVE-Build/5547/console Test logs: http://104.198.109.242/logs/PreCommit-HIVE-Build-5547/ Messages: {noformat} Executing org.apache.hive.ptest.execution.TestCheckPhase Executing org.apache.hive.ptest.execution.PrepPhase Executing org.apache.hive.ptest.execution.ExecutionPhase Executing org.apache.hive.ptest.execution.ReportingPhase Tests exited with: TestsFailedException: 6 tests failed {noformat} This message is automatically generated. ATTACHMENT ID: 12837090 - PreCommit-HIVE-Build > Spark client process can be stuck when UNHEALTHY NodeManager exists > --- > > Key: HIVE-15101 > URL: https://issues.apache.org/jira/browse/HIVE-15101 > Project: Hive > Issue Type: Bug > Components: Spark >Affects Versions: 2.0.0, 2.1.0 > Environment: Hive 2.1.0 > Spark 1.6.2 >Reporter: Satoshi Iijima >Assignee: Satoshi Iijima > Attachments: hadoop-yarn-nodemanager.log, HIVE-15101.patch, > hive.log.gz > > > When a Hive-on-Spark job is executed on YARN environment where UNHEALTHY > NodeManager exists, Spark client can be stuck in RUNNING state. > thread dump: > {code} > "008ee7b6-b083-4ac9-ae1c-b6097d9bf761 main" #1 prio=5 os_prio=0 > tid=0x7f14f4013800 nid=0x3855 in Object.wait() [0x7f14fd9b1000] >java.lang.Thread.State: WAITING (on object monitor) > at java.lang.Object.wait(Native Method) > - waiting on <0xf6615550> (a > io.netty.util.concurrent.DefaultPromise) > at java.lang.Object.wait(Object.java:502) > at > io.netty.util.concurrent.DefaultPromise.await(DefaultPromise.java:254) > - locked <0xf6615550> (a > io.netty.util.concurrent.DefaultPromise) > at io.netty.util.concurrent.DefaultPromise.await(DefaultPromise.java:32) > at io.netty.util.concurrent.AbstractFuture.get(AbstractFuture.java:31) > at > org.apache.hive.spark.client.SparkClientImpl.(SparkClientImpl.java:104) > at > org.apache.hive.spark.client.SparkClientFactory.createClient(SparkClientFactory.java:80) > - locked <0xf21b8e08> (a java.lang.Class for > org.apache.hive.spark.client.SparkClientFactory) > at > org.apache.hadoop.hive.ql.exec.spark.RemoteHiveSparkClient.createRemoteClient(RemoteHiveSparkClient.java:99) > at > org.apache.hadoop.hive.ql.exec.spark.RemoteHiveSparkClient.(RemoteHiveSparkClient.java:95) > at > org.apache.hadoop.hive.ql.exec.spark.HiveSparkClientFactory.createHiveSparkClient(HiveSparkClientFactory.java:67) > at > org.apache.hadoop.hive.ql.exec.spark.session.SparkSessionImpl.open(SparkSessionImpl.java:62) > at > org.apache.hadoop.hive.ql.exec.spark.session.SparkSessionManagerImpl.getSession(SparkSessionManagerImpl.java:114) > at > org.apache.hadoop.hive.ql.exec.spark.SparkUtilities.getSparkSession(SparkUtilities.java:136) > at > org.apache.hadoop.hive.ql.exec.spark.SparkTask.execute(SparkTask.java:89) > at org.apache.hadoop.hive.ql.exec.Task.executeTask(Task.java:197) > at > org.apache.hadoop.hive.ql.exec.TaskRunner.runSequential(TaskRunner.java:100) > at org.apache.hadoop.hive.ql.Driver.launchTask(Driver.java:1858) > at org.apache.hadoop.hive.ql.Driver.execute(Driver.java:1562) > at org.apache.hadoop.hive.ql.Driver.runInternal(Driver.java:1313) > at org.apache.hadoop.hive.ql.Driver.run(Driver.java:1084) > at org.apache.hadoop.hive.ql.Driver.run(Driver.java:1072) > at > org.apache.hadoop.hive.cli.CliDriver.processLocalCmd(CliDriver.java:232) > at org.apache.hadoop.hive.cli.CliDriver.processCmd(CliDriver.java:183) > at