[hive] branch master updated: HIVE-22164: Vectorized Limit operator returns wrong number of results with offset (Ramesh Kumar Thangarajan, reviewed by Vineet Garg)

vgarg Mon, 09 Sep 2019 22:06:07 -0700

This is an automated email from the ASF dual-hosted git repository.

vgarg pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git



The following commit(s) were added to refs/heads/master by this push:
     new 1dfa2d8  HIVE-22164: Vectorized Limit operator returns wrong number of 
results with offset (Ramesh Kumar Thangarajan, reviewed by Vineet Garg)
1dfa2d8 is described below

commit 1dfa2d8c666617cf148c9e5831390d1635649ff0
Author: Ramesh Kumar Thangarajan <rameshku...@cloudera.com>
AuthorDate: Mon Sep 9 22:02:54 2019 -0700

    HIVE-22164: Vectorized Limit operator returns wrong number of results with 
offset (Ramesh Kumar Thangarajan, reviewed by Vineet Garg)
---
 .../queries/clientpositive/vector_offset_limit.q   | 34 ++++++++
 .../clientpositive/llap/vector_offset_limit.q.out  | 95 ++++++++++++++++++++++
 2 files changed, 129 insertions(+)

diff --git a/ql/src/test/queries/clientpositive/vector_offset_limit.q 
b/ql/src/test/queries/clientpositive/vector_offset_limit.q
new file mode 100644
index 0000000..dd6a3e7
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/vector_offset_limit.q
@@ -0,0 +1,34 @@
+set hive.auto.convert.join=true;
+set hive.auto.convert.sortmerge.join=true;
+set 
hive.default.rcfile.serde=org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe;
+set hive.enforce.sortmergebucketmapjoin=true;
+set hive.exec.reducers.bytes.per.reducer=67108864;
+set hive.fetch.output.serde=org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe;
+set hive.limit.optimize.enable=true;
+set hive.limit.pushdown.memory.usage=0.04;
+set hive.llap.io.enabled=true;
+set hive.map.aggr.hash.min.reduction=0.99;
+set hive.mapjoin.bucket.cache.size=10000;
+set hive.mapjoin.hybridgrace.hashtable=false;
+set hive.merge.mapfiles=false;
+set hive.merge.nway.joins=false;
+set hive.optimize.bucketmapjoin=true;
+set hive.optimize.index.filter=true;
+set hive.stats.fetch.bitvector=false;
+set hive.stats.fetch.column.stats=true;
+set hive.support.quoted.identifiers=none;
+set hive.tez.auto.reducer.parallelism=true;
+set hive.tez.bucket.pruning=true;
+set hive.vectorized.execution.enabled=true;
+set hive.vectorized.execution.mapjoin.minmax.enabled=true;
+set hive.vectorized.execution.mapjoin.native.fast.hashtable.enabled=true;
+set hive.vectorized.groupby.checkinterval=4096;
+
+drop table if exists TLIMITOFFSET;
+create table if not exists TLIMITOFFSET (name string, id int, flag string) 
STORED AS orc;
+create table if not exists TLIMITOFFSETSTAGE (name string, id int, flag 
string) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' LINES TERMINATED BY '\n' 
STORED AS TEXTFILE ;
+LOAD DATA LOCAL INPATH '../../data/files/tjoin3.txt' OVERWRITE INTO TABLE 
TLIMITOFFSETSTAGE;
+INSERT INTO TABLE TLIMITOFFSET SELECT * from TLIMITOFFSETSTAGE;
+
+SELECT name,id FROM TLIMITOFFSET where name='testname' ORDER BY id LIMIT 20;
+SELECT name,id FROM TLIMITOFFSET where name='testname' ORDER BY id LIMIT 20 
OFFSET 10;
diff --git a/ql/src/test/results/clientpositive/llap/vector_offset_limit.q.out 
b/ql/src/test/results/clientpositive/llap/vector_offset_limit.q.out
new file mode 100644
index 0000000..06c9682
--- /dev/null
+++ b/ql/src/test/results/clientpositive/llap/vector_offset_limit.q.out
@@ -0,0 +1,95 @@
+PREHOOK: query: drop table if exists TLIMITOFFSET
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: drop table if exists TLIMITOFFSET
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: create table if not exists TLIMITOFFSET (name string, id int, 
flag string) STORED AS orc
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@TLIMITOFFSET
+POSTHOOK: query: create table if not exists TLIMITOFFSET (name string, id int, 
flag string) STORED AS orc
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@TLIMITOFFSET
+PREHOOK: query: create table if not exists TLIMITOFFSETSTAGE (name string, id 
int, flag string) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' LINES 
TERMINATED BY '\n' STORED AS TEXTFILE
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@TLIMITOFFSETSTAGE
+POSTHOOK: query: create table if not exists TLIMITOFFSETSTAGE (name string, id 
int, flag string) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' LINES 
TERMINATED BY '\n' STORED AS TEXTFILE
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@TLIMITOFFSETSTAGE
+PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/tjoin3.txt' OVERWRITE 
INTO TABLE TLIMITOFFSETSTAGE
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@tlimitoffsetstage
+POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/tjoin3.txt' 
OVERWRITE INTO TABLE TLIMITOFFSETSTAGE
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@tlimitoffsetstage
+PREHOOK: query: INSERT INTO TABLE TLIMITOFFSET SELECT * from TLIMITOFFSETSTAGE
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tlimitoffsetstage
+PREHOOK: Output: default@tlimitoffset
+POSTHOOK: query: INSERT INTO TABLE TLIMITOFFSET SELECT * from TLIMITOFFSETSTAGE
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tlimitoffsetstage
+POSTHOOK: Output: default@tlimitoffset
+POSTHOOK: Lineage: tlimitoffset.flag SIMPLE 
[(tlimitoffsetstage)tlimitoffsetstage.FieldSchema(name:flag, type:string, 
comment:null), ]
+POSTHOOK: Lineage: tlimitoffset.id SIMPLE 
[(tlimitoffsetstage)tlimitoffsetstage.FieldSchema(name:id, type:int, 
comment:null), ]
+POSTHOOK: Lineage: tlimitoffset.name SIMPLE 
[(tlimitoffsetstage)tlimitoffsetstage.FieldSchema(name:name, type:string, 
comment:null), ]
+PREHOOK: query: SELECT name,id FROM TLIMITOFFSET where name='testname' ORDER 
BY id LIMIT 20
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tlimitoffset
+PREHOOK: Output: hdfs://### HDFS PATH ###
+POSTHOOK: query: SELECT name,id FROM TLIMITOFFSET where name='testname' ORDER 
BY id LIMIT 20
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tlimitoffset
+POSTHOOK: Output: hdfs://### HDFS PATH ###
+testname1
+testname2
+testname3
+testname4
+testname5
+testname6
+testname7
+testname8
+testname9
+testname10
+testname11
+testname12
+testname13
+testname14
+testname15
+testname16
+testname17
+testname18
+testname19
+testname20
+PREHOOK: query: SELECT name,id FROM TLIMITOFFSET where name='testname' ORDER 
BY id LIMIT 20 OFFSET 10
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tlimitoffset
+PREHOOK: Output: hdfs://### HDFS PATH ###
+POSTHOOK: query: SELECT name,id FROM TLIMITOFFSET where name='testname' ORDER 
BY id LIMIT 20 OFFSET 10
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tlimitoffset
+POSTHOOK: Output: hdfs://### HDFS PATH ###
+testname11
+testname12
+testname13
+testname14
+testname15
+testname16
+testname17
+testname18
+testname19
+testname20
+testname21
+testname22
+testname23
+testname24
+testname25
+testname26
+testname27
+testname28
+testname29
+testname30

[hive] branch master updated: HIVE-22164: Vectorized Limit operator returns wrong number of results with offset (Ramesh Kumar Thangarajan, reviewed by Vineet Garg)

Reply via email to