[ https://issues.apache.org/jira/browse/HIVE-10417?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=14506650#comment-14506650 ]
Nemon Lou commented on HIVE-10417: ---------------------------------- Issue 1 raise the following error and makes sampling failed: {quote} 2015-04-21 17:23:48,425 | ERROR | HiveServer2-Background-Pool: Thread-771 | Sampling error | org.apache.hadoop.hive.ql.exec.mr.ExecDriver.execute(ExecDriver.java:389) java.io.IOException: java.util.NoSuchElementException at org.apache.hadoop.hive.ql.exec.FetchOperator.getNextRow(FetchOperator.java:636) at org.apache.hadoop.hive.ql.exec.FetchOperator.pushRow(FetchOperator.java:534) at org.apache.hadoop.hive.ql.exec.PartitionKeySampler$FetchSampler.pushRow(PartitionKeySampler.java:153) at org.apache.hadoop.hive.ql.exec.mr.ExecDriver.handleSampling(ExecDriver.java:552) at org.apache.hadoop.hive.ql.exec.mr.ExecDriver.execute(ExecDriver.java:382) at org.apache.hadoop.hive.ql.exec.mr.MapRedTask.execute(MapRedTask.java:160) at org.apache.hadoop.hive.ql.exec.Task.executeTask(Task.java:153) at org.apache.hadoop.hive.ql.exec.TaskRunner.runSequential(TaskRunner.java:85) at org.apache.hadoop.hive.ql.Driver.launchTask(Driver.java:1549) at org.apache.hadoop.hive.ql.Driver.execute(Driver.java:1318) at org.apache.hadoop.hive.ql.Driver.runInternal(Driver.java:1136) at org.apache.hadoop.hive.ql.Driver.run(Driver.java:959) at org.apache.hadoop.hive.ql.Driver.run(Driver.java:954) at org.apache.hive.service.cli.operation.SQLOperation.runInternal(SQLOperation.java:157) at org.apache.hive.service.cli.operation.SQLOperation.access$000(SQLOperation.java:72) at org.apache.hive.service.cli.operation.SQLOperation$1$1.run(SQLOperation.java:231) at java.security.AccessController.doPrivileged(Native Method) at javax.security.auth.Subject.doAs(Subject.java:415) at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1612) at org.apache.hadoop.hive.shims.HadoopShimsSecure.doAs(HadoopShimsSecure.java:493) at org.apache.hive.service.cli.operation.SQLOperation$1.run(SQLOperation.java:241) at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:471) at java.util.concurrent.FutureTask.run(FutureTask.java:262) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615) at java.lang.Thread.run(Thread.java:745) Caused by: java.util.NoSuchElementException at java.util.ArrayList$Itr.next(ArrayList.java:834) at org.apache.hadoop.hive.ql.exec.FetchOperator.getNextPath(FetchOperator.java:364) at org.apache.hadoop.hive.ql.exec.FetchOperator.getRecordReader(FetchOperator.java:389) at org.apache.hadoop.hive.ql.exec.FetchOperator.getRecordReader(FetchOperator.java:465) at org.apache.hadoop.hive.ql.exec.FetchOperator.getNextRow(FetchOperator.java:561) ... 25 more {quote} > Parallel Order By return wrong results for partitioned tables > ------------------------------------------------------------- > > Key: HIVE-10417 > URL: https://issues.apache.org/jira/browse/HIVE-10417 > Project: Hive > Issue Type: Bug > Affects Versions: 0.14.0, 0.13.1, 1.0.0 > Reporter: Nemon Lou > Assignee: Nemon Lou > > Following is the script that reproduce this bug. > set hive.optimize.sampling.orderby=true; > set mapreduce.job.reduces=10; > select * from src order by key desc limit 10; > +--------------+----------------+ > | src.key | src.value | > +--------------+----------------+ > | 98 | val_98 | > | 98 | val_98 | > | 97 | val_97 | > | 97 | val_97 | > | 96 | val_96 | > | 95 | val_95 | > | 95 | val_95 | > | 92 | val_92 | > | 90 | val_90 | > | 90 | val_90 | > +--------------+----------------+ > 10 rows selected (47.916 seconds) > reset; > create table src_orc_p (key string ,value string ) > partitioned by (kp string) > stored as orc > tblproperties("orc.compress"="SNAPPY"); > set hive.exec.dynamic.partition.mode=nonstrict; > set hive.exec.max.dynamic.partitions.pernode=10000; > set hive.exec.max.dynamic.partitions=10000; > insert into table src_orc_p partition(kp) select *,substring(key,1) from src > distribute by substring(key,1); > set mapreduce.job.reduces=10; > set hive.optimize.sampling.orderby=true; > select * from src_orc_p order by key desc limit 10; > +----------------+------------------+-----------------+ > | src_orc_p.key | src_orc_p.value | src_orc_p.kend | > +----------------+------------------+-----------------+ > | 0 | val_0 | 0 | > | 0 | val_0 | 0 | > | 0 | val_0 | 0 | > +----------------+------------------+-----------------+ > 3 rows selected (39.861 seconds) -- This message was sent by Atlassian JIRA (v6.3.4#6332)