This is an automated email from the ASF dual-hosted git repository. vgarg pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push: new 38574e3 HIVE-22164: Vectorized Limit operator returns wrong number of results with offset (Ramesh Kumar Thangarajan, reviewed by Vineet Garg) 38574e3 is described below commit 38574e3038dfdbf1c0b8b2c44bf4e94a281668d2 Author: Ramesh Kumar Thangarajan <rameshku...@cloudera.com> AuthorDate: Mon Sep 9 22:02:54 2019 -0700 HIVE-22164: Vectorized Limit operator returns wrong number of results with offset (Ramesh Kumar Thangarajan, reviewed by Vineet Garg) --- itests/src/test/resources/testconfiguration.properties | 3 ++- .../org/apache/hadoop/hive/ql/exec/vector/VectorLimitOperator.java | 4 +++- .../results/clientpositive/llap/vector_windowing_order_null.q.out | 2 +- 3 files changed, 6 insertions(+), 3 deletions(-) diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties index 98280c5..34d0e27 100644 --- a/itests/src/test/resources/testconfiguration.properties +++ b/itests/src/test/resources/testconfiguration.properties @@ -425,7 +425,8 @@ minillap.query.files=acid_bucket_pruning.q,\ load_fs2.q,\ llap_stats.q,\ multi_count_distinct_null.q,\ - cttl.q + cttl.q,\ + vector_offset_limit.q minillaplocal.query.files=\ bucket_num_reducers_acid.q,\ diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorLimitOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorLimitOperator.java index 918a69a..79b073f 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorLimitOperator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorLimitOperator.java @@ -66,7 +66,9 @@ public class VectorLimitOperator extends LimitOperator implements VectorizationO public void process(Object row, int tag) throws HiveException { VectorizedRowBatch batch = (VectorizedRowBatch) row; - if (currCount + batch.size < offset) { + // We should skip number of rows equal to offset value + // skip until sum of current read count and current batch size less than or equal offset value + if (currCount + batch.size <= offset) { currCount += batch.size; } else if (currCount >= offset + limit) { setDone(true); diff --git a/ql/src/test/results/clientpositive/llap/vector_windowing_order_null.q.out b/ql/src/test/results/clientpositive/llap/vector_windowing_order_null.q.out index c6c39ed..e6a6a94 100644 --- a/ql/src/test/results/clientpositive/llap/vector_windowing_order_null.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_windowing_order_null.q.out @@ -827,7 +827,6 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@over10k_n21 #### A masked pattern was here #### ts s sum_window_0 -2013-03-01 09:11:58.70307 calvin laertes 197097 2013-03-01 09:11:58.70307 calvin steinbeck 262874 2013-03-01 09:11:58.70307 david falkner 328506 2013-03-01 09:11:58.70307 fred nixon 394118 @@ -837,6 +836,7 @@ ts s sum_window_0 2013-03-01 09:11:58.70307 jessica laertes 656771 2013-03-01 09:11:58.70307 jessica polk 722558 2013-03-01 09:11:58.70307 katie king 788310 +2013-03-01 09:11:58.70307 katie white 853920 PREHOOK: query: explain vectorization detail select s, i, round(sum(d) over (partition by s order by i desc nulls last) , 3) from over10k_n21 limit 5 PREHOOK: type: QUERY